[llvm] [AMDGPU] Insert copy when only one register can be constrained (PR #161435)
Nico Núñez via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 11:09:02 PDT 2025
https://github.com/niconunezz updated https://github.com/llvm/llvm-project/pull/161435
>From 6683837fbf06467f4f70003291f0724502ab709a Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Tue, 30 Sep 2025 22:29:26 +0200
Subject: [PATCH 1/5] [AMDGPU] Insert copy when only one register can be
constrained
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 19 +++++++---
.../load-store-opt-ds-regclass-constrain.mir | 37 +++++++++++++++++++
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117664983..21bd75c229042 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,12 +1352,21 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}
- if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) ||
- !MRI->constrainRegClass(Data1->getReg(), DataRC1))
+ bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
+ bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
+ if (!constrainData0 && !constrainData1) {
return nullptr;
-
- // TODO: If one register can be constrained, and not the other, insert a
- // copy.
+ } else if (!constrainData0 || !constrainData1) {
+ MachineBasicBlock::iterator InsertBefore = CI.I;
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+ const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
+ Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
+ .addReg(activeData->getReg(), 0);
+ const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+ }
}
return Where;
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 33f210533e10b..7a505f599254e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -206,5 +206,42 @@ body: |
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
+...
+
+---
+name: ds_write_b32__av32_physical
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: ds_write_b32__av32_physical
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %1:av_32 = COPY $vgpr1
+ DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
+
+---
+name: ds_write_b32__physical_av32
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: ds_write_b32__physical_av32
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %2:av_32 = COPY $vgpr2
+ DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
>From d329dbd961dde8b02489fb91d6dd7f0826bd8603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:05:33 +0200
Subject: [PATCH 2/5] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 7a505f599254e..0cb91faf088a5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -244,4 +244,5 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%2:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
- DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
+ DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...
>From d6695fdc9652c182f6ab2e52041bd1ea68563de5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:10:31 +0200
Subject: [PATCH 3/5] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 0cb91faf088a5..29d2ca7337b22 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -225,6 +225,7 @@ body: |
%1:av_32 = COPY $vgpr1
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...
...
>From 1381d104c8dcc0026a677ff56f3f31f48119054f Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Wed, 1 Oct 2025 18:24:33 +0200
Subject: [PATCH 4/5] address suggested changes
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 24 +++++++----
.../load-store-opt-ds-regclass-constrain.mir | 40 +++++++++++++++++++
2 files changed, 56 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 21bd75c229042..4df78b9e946da 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,20 +1352,28 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}
- bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
- bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
- if (!constrainData0 && !constrainData1) {
+ bool canBeConstrainedData0 =
+ MRI->constrainRegClass(Data0->getReg(), DataRC0);
+ bool canBeConstrainedData1 =
+ MRI->constrainRegClass(Data1->getReg(), DataRC1);
+ if (!canBeConstrainedData0 && !canBeConstrainedData1) {
return nullptr;
- } else if (!constrainData0 || !constrainData1) {
+ }
+ if (!canBeConstrainedData0 || !canBeConstrainedData1) {
MachineBasicBlock::iterator InsertBefore = CI.I;
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
- const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
- Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
+ const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI;
+ MachineOperand *activeData =
+ TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0);
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const TargetRegisterClass *RC = getDataRegClass(*CI.I);
+ Register BaseReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
.addReg(activeData->getReg(), 0);
- const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+
+ activeData->setReg(BaseReg);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 29d2ca7337b22..4a142f1c1dca2 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -247,3 +247,43 @@ body: |
DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
+
+
+---
+name: ds_write_b64__physical_av64
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: ds_write_b64__physical_av64
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+ ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %1:av_64_align2 = COPY $vgpr2_vgpr3
+ DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+ DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+
+
+...
+
+---
+name: ds_write_b64__av64__physical
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: ds_write_b64__av64__physical
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %2:av_64_align2 = COPY $vgpr4_vgpr5
+ DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+ DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file
>From f528451a83207827f48002ccd1b2043f37d36088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:08:51 +0200
Subject: [PATCH 5/5] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 4a142f1c1dca2..b3d0fc8b50d38 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -286,4 +286,6 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
- DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file
+ DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+ ...
+
\ No newline at end of file
More information about the llvm-commits
mailing list