[llvm] PeepholeOpt: Do not skip reg_sequence sources with subregs (PR #125667)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 03:04:10 PST 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/125667
>From f901e1c9b4879d836fa12bbf9b1985398507d48e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 27 Jan 2025 19:31:41 +0700
Subject: [PATCH 1/2] PeepholeOpt: Do not skip reg_sequence sources with
subregs
Contrary to the comment, this particular code is not responsible
for handling any composes that may be required, and unhandled cases
are already rejected later. Lift this restriction to permit composes
and reg_sequence subregisters later.
---
llvm/lib/CodeGen/PeepholeOptimizer.cpp | 4 +---
llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir | 6 +++---
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index cced54fa72819..0e1c65c98544e 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -406,9 +406,7 @@ class RegSequenceRewriter : public Rewriter {
const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
Src.Reg = MOInsertedReg.getReg();
- // If we have to compose sub-register indices, bail out.
- if ((Src.SubReg = MOInsertedReg.getSubReg()))
- return false;
+ Src.SubReg = MOInsertedReg.getSubReg();
// We want to track something that is compatible with the related
// partial definition.
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
index e1ff42125ce9a..333f7de921c24 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
@@ -22,9 +22,9 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub0, %subreg.sub1
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
; GCN-NEXT: KILL [[COPY3]], implicit [[COPY2]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
>From d535a0ada218690b39811bcde51c86604f50d845 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 6 Mar 2025 17:45:09 +0700
Subject: [PATCH 2/2] Add some more tests
---
.../AMDGPU/peephole-opt-regseq-removal.mir | 46 +++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
index 333f7de921c24..f1f2eb6baf008 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir
@@ -34,3 +34,49 @@ body: |
%5:vgpr_32 = COPY %3.sub1
KILL implicit %4, %5
...
+
+---
+name: reg_sequence_removal_2
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; GCN-LABEL: name: reg_sequence_removal_2
+ ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub0, %subreg.sub3
+ ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub2, %subreg.sub1, [[REG_SEQUENCE]].sub3, %subreg.sub2, [[COPY1]].sub0, %subreg.sub3
+ ; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]]
+ %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ %2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1
+ %3:vreg_128 = REG_SEQUENCE %2.sub1_sub2_sub3, %subreg.sub0_sub1_sub2, %1.sub0, %subreg.sub3
+ %4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3
+ KILL implicit %4
+...
+
+---
+name: reg_sequence_removal_3
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; GCN-LABEL: name: reg_sequence_removal_3
+ ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub2_sub3, %subreg.sub2_sub3, [[COPY]].sub0, %subreg.sub1, [[COPY]].sub1, %subreg.sub0
+ ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1, %subreg.sub0, [[COPY]].sub0, %subreg.sub1, [[COPY1]].sub2, %subreg.sub2, [[COPY1]].sub3, %subreg.sub3
+ ; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]]
+ %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ %2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1
+ %3:vreg_128 = REG_SEQUENCE %2.sub0_sub1, %subreg.sub2_sub3, %2.sub2, %subreg.sub1, %2.sub3, %subreg.sub0
+ %4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3
+ KILL implicit %4
+...
More information about the llvm-commits
mailing list