[llvm] [AMDGPU] Insert copy when only one register can be constrained (PR #161435)
Nico Núñez via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 12:43:57 PDT 2025
https://github.com/niconunezz updated https://github.com/llvm/llvm-project/pull/161435
>From 119d5fe36f3b0625afabfb13f7c302a67cc5b7e3 Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Tue, 30 Sep 2025 22:29:26 +0200
Subject: [PATCH 1/8] [AMDGPU] Insert copy when only one register can be
constrained
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 19 +++++++---
.../load-store-opt-ds-regclass-constrain.mir | 37 +++++++++++++++++++
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117664983..21bd75c229042 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,12 +1352,21 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}
- if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) ||
- !MRI->constrainRegClass(Data1->getReg(), DataRC1))
+ bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
+ bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
+ if (!constrainData0 && !constrainData1) {
return nullptr;
-
- // TODO: If one register can be constrained, and not the other, insert a
- // copy.
+ } else if (!constrainData0 || !constrainData1) {
+ MachineBasicBlock::iterator InsertBefore = CI.I;
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+ const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
+ Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
+ .addReg(activeData->getReg(), 0);
+ const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+ }
}
return Where;
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 33f210533e10b..7a505f599254e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -206,5 +206,42 @@ body: |
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
+...
+
+---
+name: ds_write_b32__av32_physical
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: ds_write_b32__av32_physical
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %1:av_32 = COPY $vgpr1
+ DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
+
+---
+name: ds_write_b32__physical_av32
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: ds_write_b32__physical_av32
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %2:av_32 = COPY $vgpr2
+ DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
>From e9f7002f13bc93de12933fdb53b6db50a1e0d73b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:05:33 +0200
Subject: [PATCH 2/8] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 7a505f599254e..0cb91faf088a5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -244,4 +244,5 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%2:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
- DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
+ DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...
>From 80619de370adbe44e2347cb0a4c7b4d36f4daf3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:10:31 +0200
Subject: [PATCH 3/8] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 0cb91faf088a5..29d2ca7337b22 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -225,6 +225,7 @@ body: |
%1:av_32 = COPY $vgpr1
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...
...
>From b716facf2230cc867b69a2414429525e8f5cd0ce Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Wed, 1 Oct 2025 18:24:33 +0200
Subject: [PATCH 4/8] address suggested changes
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 24 +++++++----
.../load-store-opt-ds-regclass-constrain.mir | 40 +++++++++++++++++++
2 files changed, 56 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 21bd75c229042..4df78b9e946da 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,20 +1352,28 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}
- bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
- bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
- if (!constrainData0 && !constrainData1) {
+ bool canBeConstrainedData0 =
+ MRI->constrainRegClass(Data0->getReg(), DataRC0);
+ bool canBeConstrainedData1 =
+ MRI->constrainRegClass(Data1->getReg(), DataRC1);
+ if (!canBeConstrainedData0 && !canBeConstrainedData1) {
return nullptr;
- } else if (!constrainData0 || !constrainData1) {
+ }
+ if (!canBeConstrainedData0 || !canBeConstrainedData1) {
MachineBasicBlock::iterator InsertBefore = CI.I;
MachineBasicBlock *MBB = CI.I->getParent();
- DebugLoc DL = CI.I->getDebugLoc();
- const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
- Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+ Paired.I->getDebugLoc());
+ const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI;
+ MachineOperand *activeData =
+ TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0);
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const TargetRegisterClass *RC = getDataRegClass(*CI.I);
+ Register BaseReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
.addReg(activeData->getReg(), 0);
- const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+
+ activeData->setReg(BaseReg);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 29d2ca7337b22..4a142f1c1dca2 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -247,3 +247,43 @@ body: |
DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
+
+
+---
+name: ds_write_b64__physical_av64
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: ds_write_b64__physical_av64
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+ ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %1:av_64_align2 = COPY $vgpr2_vgpr3
+ DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+ DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+
+
+...
+
+---
+name: ds_write_b64__av64__physical
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: ds_write_b64__av64__physical
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+ %0:vgpr_32 = COPY $vgpr0
+ %2:av_64_align2 = COPY $vgpr4_vgpr5
+ DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+ DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file
>From 181d98fe7134480e93384f7055ef9bad24e29800 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:08:51 +0200
Subject: [PATCH 5/8] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 4a142f1c1dca2..b3d0fc8b50d38 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -286,4 +286,6 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
- DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file
+ DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+ ...
+
\ No newline at end of file
>From dd4252e237e4a270fde7e90760a39da1ecd8182e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:09:15 +0200
Subject: [PATCH 6/8] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index b3d0fc8b50d38..7fa762b181568 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -266,6 +266,7 @@ body: |
%1:av_64_align2 = COPY $vgpr2_vgpr3
DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+...
...
>From b7a84e426e787df93f0875a4f9e7eeb0d52efc75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
<125479151+niconunezz at users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:09:25 +0200
Subject: [PATCH 7/8] Update
llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 7fa762b181568..b29fe4c8e7dc3 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -248,7 +248,6 @@ body: |
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
-
---
name: ds_write_b64__physical_av64
body: |
>From 06c1b3eae06a9f92afcb2d119909f5a9705ceaae Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Fri, 10 Oct 2025 21:31:56 +0200
Subject: [PATCH 8/8] addressed suggested changes
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 +
.../load-store-opt-ds-regclass-constrain.mir | 66 ++++---------------
2 files changed, 15 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4df78b9e946da..57d3eadfcc5ef 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,6 +1352,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}
+ if (Data0->getReg().isPhysical() || Data1->getReg().isPhysical()) {
+ return nullptr;
+ }
bool canBeConstrainedData0 =
MRI->constrainRegClass(Data0->getReg(), DataRC0);
bool canBeConstrainedData1 =
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index b29fe4c8e7dc3..34efcadc2951d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -217,18 +217,17 @@ body: |
; CHECK-LABEL: name: ds_write_b32__av32_physical
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+ ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %1:av_32 = COPY $vgpr1
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+
%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr1
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
-...
-
---
name: ds_write_b32__physical_av32
body: |
@@ -238,54 +237,13 @@ body: |
; CHECK-LABEL: name: ds_write_b32__physical_av32
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
- %0:vgpr_32 = COPY $vgpr0
- %2:av_32 = COPY $vgpr2
- DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
- DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
-...
-
----
-name: ds_write_b64__physical_av64
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %1:av_32 = COPY $vgpr2
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
- ; CHECK-LABEL: name: ds_write_b64__physical_av64
- ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
- %1:av_64_align2 = COPY $vgpr2_vgpr3
- DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
- DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3)
-...
-
-
+ %1:av_32 = COPY $vgpr2
+ DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+ DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
-
----
-name: ds_write_b64__av64__physical
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
-
- ; CHECK-LABEL: name: ds_write_b64__av64__physical
- ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
- %0:vgpr_32 = COPY $vgpr0
- %2:av_64_align2 = COPY $vgpr4_vgpr5
- DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
- DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
- ...
-
\ No newline at end of file
More information about the llvm-commits
mailing list