[llvm] ed38d67 - PeepholeOpt: Handle subregister compose when looking through reg_sequence (#127051)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 17:07:33 PST 2025
Author: Matt Arsenault
Date: 2025-02-18T08:07:29+07:00
New Revision: ed38d6702f7695092c9486016e2504f8c6bfef37
URL: https://github.com/llvm/llvm-project/commit/ed38d6702f7695092c9486016e2504f8c6bfef37
DIFF: https://github.com/llvm/llvm-project/commit/ed38d6702f7695092c9486016e2504f8c6bfef37.diff
LOG: PeepholeOpt: Handle subregister compose when looking through reg_sequence (#127051)
Previously this would give up on folding subregister copies through
a reg_sequence if the input operand already had a subregister index.
d246cc618adc52fdbd69d44a2a375c8af97b6106 stopped introducing these
subregister uses, and this is the first step to lifting that restriction.
I was expecting to be able to implement this only purely with compose /
reverse compose, but I wasn't able to make it work so relies on testing
the lanemasks for whether the copy reads a subset of the input.
Added:
Modified:
llvm/lib/CodeGen/PeepholeOptimizer.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll
llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 745c0d4b36a62..24bd9938bc45c 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1984,12 +1984,43 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
// We are looking at:
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
- // Check if one of the operand defines the subreg we are interested in.
+ //
+ // Check if one of the operands exactly defines the subreg we are interested
+ // in.
for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
if (RegSeqInput.SubIdx == DefSubReg)
return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
}
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+
+ // If we did not find an exact match, see if we can do a composition to
+ // extract a sub-subregister.
+ for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
+ // We don't check if the resulting class supports the subregister index
+ // yet. This will occur before any rewrite when looking for an eligible
+ // source.
+
+ LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
+ LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
+
+ // Check that this extract reads a subset of this single reg_sequence input.
+ //
+ // FIXME: We should be able to filter this in terms of the indexes directly
+ // without checking the lanemasks.
+ if ((DefMask & ThisOpRegMask) != DefMask)
+ continue;
+
+ unsigned ReverseDefCompose =
+ TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
+ if (!ReverseDefCompose)
+ continue;
+
+ unsigned ComposedDefInSrcReg1 =
+ TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
+ return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
+ }
+
// If the subreg we are tracking is super-defined by another subreg,
// we could follow this value. However, this would require to compose
// the subreg and we do not do that for now.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index f2a4332bcb8ba..c136028f2de43 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -2872,8 +2872,8 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14
-; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc
-; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v1, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: dyn_extract_v7f64_v_v:
@@ -2898,8 +2898,8 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
-; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: dyn_extract_v7f64_v_v:
@@ -2918,7 +2918,7 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14
; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v0 :: v_dual_cndmask_b32 v1, v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <7 x double> %vec, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll
index 3eb9d474ec030..f961e857f39e5 100644
--- a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll
@@ -79,9 +79,9 @@ define void @issue92561(ptr addrspace(1) %arg) {
; GISEL: ; %bb.0: ; %bb
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_clause 0x1
-; GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off
-; GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:16
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16
+; GISEL-NEXT: v_mov_b32_e32 v8, 0
; GISEL-NEXT: s_mov_b32 s20, 0
; GISEL-NEXT: s_mov_b32 s3, exec_lo
; GISEL-NEXT: s_mov_b32 s21, s20
@@ -97,19 +97,19 @@ define void @issue92561(ptr addrspace(1) %arg) {
; GISEL-NEXT: s_mov_b32 s11, s20
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT: v_readfirstlane_b32 s12, v2
-; GISEL-NEXT: v_readfirstlane_b32 s13, v3
-; GISEL-NEXT: v_readfirstlane_b32 s14, v4
-; GISEL-NEXT: v_readfirstlane_b32 s15, v5
-; GISEL-NEXT: v_readfirstlane_b32 s16, v6
-; GISEL-NEXT: v_readfirstlane_b32 s17, v7
-; GISEL-NEXT: v_readfirstlane_b32 s18, v8
-; GISEL-NEXT: v_readfirstlane_b32 s19, v9
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[12:13], v[2:3]
-; GISEL-NEXT: v_cmp_eq_u64_e64 s0, s[14:15], v[4:5]
-; GISEL-NEXT: v_cmp_eq_u64_e64 s1, s[16:17], v[6:7]
+; GISEL-NEXT: v_readfirstlane_b32 s12, v4
+; GISEL-NEXT: v_readfirstlane_b32 s13, v5
+; GISEL-NEXT: v_readfirstlane_b32 s14, v6
+; GISEL-NEXT: v_readfirstlane_b32 s15, v7
+; GISEL-NEXT: v_readfirstlane_b32 s16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s17, v1
+; GISEL-NEXT: v_readfirstlane_b32 s18, v2
+; GISEL-NEXT: v_readfirstlane_b32 s19, v3
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[12:13], v[4:5]
+; GISEL-NEXT: v_cmp_eq_u64_e64 s0, s[14:15], v[6:7]
+; GISEL-NEXT: v_cmp_eq_u64_e64 s1, s[16:17], v[0:1]
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GISEL-NEXT: v_cmp_eq_u64_e64 s2, s[18:19], v[8:9]
+; GISEL-NEXT: v_cmp_eq_u64_e64 s2, s[18:19], v[2:3]
; GISEL-NEXT: s_and_b32 s0, vcc_lo, s0
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_and_b32 s0, s0, s1
@@ -117,29 +117,31 @@ define void @issue92561(ptr addrspace(1) %arg) {
; GISEL-NEXT: s_and_b32 s0, s0, s2
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-NEXT: s_and_saveexec_b32 s0, s0
-; GISEL-NEXT: image_sample_c_lz v1, [v0, v0, v0, v0], s[12:19], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
-; GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
-; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: image_sample_c_lz v9, [v8, v8, v8, v8], s[12:19], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GISEL-NEXT: ; implicit-def: $vgpr8
; GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GISEL-NEXT: s_cbranch_execnz .LBB0_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b32 exec_lo, s3
-; GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0
-; GISEL-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7fc00000
+; GISEL-NEXT: v_mov_b32_e32 v2, 1.0
; GISEL-NEXT: s_clause 0x2
-; GISEL-NEXT: image_sample_c_lz v0, [v2, v2, v0, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
-; GISEL-NEXT: image_sample_c_lz v3, [v2, v3, v2, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
-; GISEL-NEXT: image_sample_c_lz v4, [v2, v2, v2, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GISEL-NEXT: image_sample_c_lz v0, [v1, v1, v0, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GISEL-NEXT: image_sample_c_lz v2, [v1, v2, v1, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GISEL-NEXT: image_sample_c_lz v3, [v1, v1, v1, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
; GISEL-NEXT: s_waitcnt vmcnt(2)
-; GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GISEL-NEXT: v_add_f32_e32 v0, v9, v0
; GISEL-NEXT: s_waitcnt vmcnt(1)
-; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GISEL-NEXT: v_dual_add_f32 v0, v3, v0 :: v_dual_mov_b32 v3, v2
+; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GISEL-NEXT: v_add_f32_e32 v0, v2, v0
+; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_add_f32_e32 v0, v4, v0
+; GISEL-NEXT: v_add_f32_e32 v0, v3, v0
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GISEL-NEXT: v_mul_f32_e32 v1, 0x3e800000, v0
-; GISEL-NEXT: image_store v[1:3], [v2, v2], s[4:11] dim:SQ_RSRC_IMG_2D unorm
+; GISEL-NEXT: v_mul_f32_e32 v0, 0x3e800000, v0
+; GISEL-NEXT: image_store v[0:2], [v1, v1], s[4:11] dim:SQ_RSRC_IMG_2D unorm
; GISEL-NEXT: s_setpc_b64 s[30:31]
bb:
%descriptor = load <8 x i32>, ptr addrspace(1) %arg, align 32
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
index ea8e2edb80c7e..6d2f4e76840ae 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
@@ -162,7 +162,7 @@ body: |
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY4]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -189,7 +189,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub0, [[COPY1]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
@@ -212,7 +212,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub0, [[COPY1]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
@@ -285,7 +285,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[V_MOV_B32_e32_]], %subreg.sub2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:vreg_64 = COPY $vgpr1_vgpr2
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -311,8 +311,8 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
@@ -340,7 +340,7 @@ body: |
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2, [[V_MOV_B32_e32_1]], %subreg.sub3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -367,8 +367,8 @@ body: |
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2, [[V_MOV_B32_e32_1]], %subreg.sub3
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -420,7 +420,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -444,7 +444,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -468,7 +468,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -492,7 +492,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -516,7 +516,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -540,7 +540,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub0_sub1, %subreg.sub2_sub3, [[COPY]].sub2_sub3, %subreg.sub0_sub1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -564,7 +564,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub0_sub1, %subreg.sub2_sub3, [[COPY]].sub2_sub3, %subreg.sub0_sub1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8
@@ -588,7 +588,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub1, %subreg.sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_64 = COPY $vgpr5_vgpr6
@@ -615,9 +615,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub1, %subreg.sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
@@ -650,12 +650,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub4_sub5, [[COPY]].sub1_sub2, %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub4
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]]
%0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
%1:vreg_64 = COPY $vgpr5_vgpr6
More information about the llvm-commits
mailing list