[llvm] CodeGen: Treat subreg-to-subreg copies as isFullCopyInstr (PR #120056)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 01:23:55 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/120056
This enables better copy folding during allocation.
I'm assuming the intent of this function is to identify copies
that do not change the register width (i.e. not subregister
insert or extract). Permit exact match subregisters since this
should result in a full copy for the final allocated copy.
This should probably be more permissive still. I think it should
accept any cases where getSubRegisterClass is the same for both
subregister indexes.
>From 3ea36fb381038e7be179ac40ff65a9d03df9a07b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 16 Dec 2024 15:27:40 +0700
Subject: [PATCH] CodeGen: Treat subreg-to-subreg copies as isFullCopyInstr
This enables better copy folding during allocation.
I'm assuming the intent of this function is to identify copies
that do not change the register width (i.e. not subregister
insert or extract). Permit exact match subregisters since this
should result in a full copy for the final allocated copy.
This should probably be more permissive still. I think it should
accept any cases where getSubRegisterClass is the same for both
subregister indexes.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 2 +-
llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 3 +-
.../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 44 +-
.../AMDGPU/splitkit-copy-live-lanes.mir | 447 +++++++-----------
.../CodeGen/AVR/inline-asm/inline-asm3.ll | 2 +-
.../RISCV/rvv/fixed-vectors-trunc-vp.ll | 3 +-
llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 27 +-
llvm/test/CodeGen/Thumb2/mve-vst3.ll | 100 ++--
llvm/test/CodeGen/Thumb2/mve-vst4.ll | 12 +-
9 files changed, 249 insertions(+), 391 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 408adcd330b846..0e8731a9c405d0 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1109,7 +1109,7 @@ class TargetInstrInfo : public MCInstrInfo {
const MachineOperand *DestRegOp = DestSrc->Destination;
const MachineOperand *SrcRegOp = DestSrc->Source;
- return !DestRegOp->getSubReg() && !SrcRegOp->getSubReg();
+ return DestRegOp->getSubReg() == SrcRegOp->getSubReg();
}
/// If the specific machine instruction is an instruction that adds an
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 64f1f45bf734cf..e246b44611ae4e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -7274,7 +7274,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v15
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v17
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v20
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v20
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v16
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14
@@ -7289,7 +7289,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v18
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v20
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v5
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v19
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v19
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v21
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
index 8f53ec2f992dac..d3523587b58941 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -272,38 +272,24 @@ body: |
; RA-NEXT: internal [[COPY]].sub13:sgpr_512 = COPY [[DEF2]].sub13
; RA-NEXT: internal [[COPY]].sub14:sgpr_512 = COPY [[DEF2]].sub14
; RA-NEXT: }
- ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[COPY]].sub4_sub5 {
- ; RA-NEXT: internal [[COPY1]].sub10_sub11:sgpr_512 = COPY [[COPY]].sub10_sub11
- ; RA-NEXT: internal [[COPY1]].sub7:sgpr_512 = COPY [[COPY]].sub7
- ; RA-NEXT: internal [[COPY1]].sub8:sgpr_512 = COPY [[COPY]].sub8
- ; RA-NEXT: internal [[COPY1]].sub13:sgpr_512 = COPY [[COPY]].sub13
- ; RA-NEXT: internal [[COPY1]].sub14:sgpr_512 = COPY [[COPY]].sub14
- ; RA-NEXT: }
- ; RA-NEXT: SI_SPILL_S512_SAVE [[COPY1]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
+ ; RA-NEXT: SI_SPILL_S512_SAVE [[COPY]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
- ; RA-NEXT: undef [[COPY2:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
- ; RA-NEXT: internal [[COPY2]].sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
- ; RA-NEXT: internal [[COPY2]].sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
- ; RA-NEXT: internal [[COPY2]].sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8
- ; RA-NEXT: internal [[COPY2]].sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
- ; RA-NEXT: internal [[COPY2]].sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
- ; RA-NEXT: }
- ; RA-NEXT: undef [[COPY3:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[COPY2]].sub4_sub5 {
- ; RA-NEXT: internal [[COPY3]].sub10_sub11:sgpr_512 = COPY [[COPY2]].sub10_sub11
- ; RA-NEXT: internal [[COPY3]].sub7:sgpr_512 = COPY [[COPY2]].sub7
- ; RA-NEXT: internal [[COPY3]].sub8:sgpr_512 = COPY [[COPY2]].sub8
- ; RA-NEXT: internal [[COPY3]].sub13:sgpr_512 = COPY [[COPY2]].sub13
- ; RA-NEXT: internal [[COPY3]].sub14:sgpr_512 = COPY [[COPY2]].sub14
+ ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
+ ; RA-NEXT: internal [[COPY1]].sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
+ ; RA-NEXT: internal [[COPY1]].sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
+ ; RA-NEXT: internal [[COPY1]].sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8
+ ; RA-NEXT: internal [[COPY1]].sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13
+ ; RA-NEXT: internal [[COPY1]].sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14
; RA-NEXT: }
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub4, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub5, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub10, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub11, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub7, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub8, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub13, 0 :: (dereferenceable invariant load (s32))
- ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY3]].sub14, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub4, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub5, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub10, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub11, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub7, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub8, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub13, 0 :: (dereferenceable invariant load (s32))
+ ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], [[COPY1]].sub14, 0 :: (dereferenceable invariant load (s32))
; RA-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]]
;
; VR-LABEL: name: splitkit_copy_unbundle_reorder
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
index 42db92b15acf50..2e6e15faa8737c 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
@@ -112,94 +112,64 @@ body: |
; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY17]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5)
; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_8]].sub2
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec
- ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_128 = COPY [[COPY18]].sub0 {
- ; CHECK-NEXT: internal [[COPY19]].sub2:vreg_128 = COPY [[COPY18]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_9]].sub2
- ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
- ; CHECK-NEXT: undef [[COPY21:%[0-9]+]].sub0:vreg_128 = COPY [[COPY20]].sub0 {
- ; CHECK-NEXT: internal [[COPY21]].sub2:vreg_128 = COPY [[COPY20]].sub2
- ; CHECK-NEXT: }
+ ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_9]].sub2
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec
- ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub0 {
- ; CHECK-NEXT: internal [[COPY22]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY23:%[0-9]+]].sub0:vreg_128 = COPY [[COPY22]].sub0 {
- ; CHECK-NEXT: internal [[COPY23]].sub2:vreg_128 = COPY [[COPY22]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY23]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY24:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_11]].sub2
- ; CHECK-NEXT: [[COPY24:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
- ; CHECK-NEXT: undef [[COPY25:%[0-9]+]].sub0:vreg_128 = COPY [[COPY24]].sub0 {
- ; CHECK-NEXT: internal [[COPY25]].sub2:vreg_128 = COPY [[COPY24]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY25]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY26:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub2
- ; CHECK-NEXT: [[COPY26:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
- ; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub0:vreg_128 = COPY [[COPY26]].sub0 {
- ; CHECK-NEXT: internal [[COPY27]].sub2:vreg_128 = COPY [[COPY26]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY27]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY28:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_13]].sub2
- ; CHECK-NEXT: [[COPY28:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_12:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
+ ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_13]].sub2
+ ; CHECK-NEXT: undef [[COPY21:%[0-9]+]].sub2:vreg_128 = COPY [[COPY20]].sub2
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
+ ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0:vreg_128 = COPY [[COPY21]].sub0 {
+ ; CHECK-NEXT: internal [[COPY22]].sub2:vreg_128 = COPY [[COPY21]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY22]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY23:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_14]].sub2
+ ; CHECK-NEXT: [[COPY23:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
+ ; CHECK-NEXT: undef [[COPY24:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_15]].sub2
+ ; CHECK-NEXT: undef [[COPY25:%[0-9]+]].sub2:vreg_128 = COPY [[COPY24]].sub2
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
+ ; CHECK-NEXT: undef [[COPY26:%[0-9]+]].sub0:vreg_128 = COPY [[COPY25]].sub0 {
+ ; CHECK-NEXT: internal [[COPY26]].sub2:vreg_128 = COPY [[COPY25]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY26]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_16]].sub2
+ ; CHECK-NEXT: undef [[COPY28:%[0-9]+]].sub2:vreg_128 = COPY [[COPY27]].sub2
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
; CHECK-NEXT: undef [[COPY29:%[0-9]+]].sub0:vreg_128 = COPY [[COPY28]].sub0 {
; CHECK-NEXT: internal [[COPY29]].sub2:vreg_128 = COPY [[COPY28]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY30:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_14]].sub2
- ; CHECK-NEXT: [[COPY30:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
- ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub0:vreg_128 = COPY [[COPY30]].sub0 {
- ; CHECK-NEXT: internal [[COPY31]].sub2:vreg_128 = COPY [[COPY30]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY31]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY32:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_15]].sub2
- ; CHECK-NEXT: [[COPY32:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
- ; CHECK-NEXT: undef [[COPY33:%[0-9]+]].sub0:vreg_128 = COPY [[COPY32]].sub0 {
- ; CHECK-NEXT: internal [[COPY33]].sub2:vreg_128 = COPY [[COPY32]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY34:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_16]].sub2
- ; CHECK-NEXT: [[COPY34:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY29]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY30:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_17]].sub2
+ ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub2:vreg_128 = COPY [[COPY30]].sub2
+ ; CHECK-NEXT: [[COPY31:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
+ ; CHECK-NEXT: undef [[COPY32:%[0-9]+]].sub0:vreg_128 = COPY [[COPY31]].sub0 {
+ ; CHECK-NEXT: internal [[COPY32]].sub2:vreg_128 = COPY [[COPY31]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY32]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY33:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_18]].sub2
+ ; CHECK-NEXT: undef [[COPY34:%[0-9]+]].sub2:vreg_128 = COPY [[COPY33]].sub2
+ ; CHECK-NEXT: [[COPY34:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
; CHECK-NEXT: undef [[COPY35:%[0-9]+]].sub0:vreg_128 = COPY [[COPY34]].sub0 {
; CHECK-NEXT: internal [[COPY35]].sub2:vreg_128 = COPY [[COPY34]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY36:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_17]].sub2
- ; CHECK-NEXT: [[COPY36:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
- ; CHECK-NEXT: undef [[COPY37:%[0-9]+]].sub0:vreg_128 = COPY [[COPY36]].sub0 {
- ; CHECK-NEXT: internal [[COPY37]].sub2:vreg_128 = COPY [[COPY36]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY38:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_18]].sub2
- ; CHECK-NEXT: [[COPY38:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
- ; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub0:vreg_128 = COPY [[COPY38]].sub0 {
- ; CHECK-NEXT: internal [[COPY39]].sub2:vreg_128 = COPY [[COPY38]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY40:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_19]].sub2
- ; CHECK-NEXT: [[COPY40:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
- ; CHECK-NEXT: undef [[COPY41:%[0-9]+]].sub0:vreg_128 = COPY [[COPY40]].sub0 {
- ; CHECK-NEXT: internal [[COPY41]].sub2:vreg_128 = COPY [[COPY40]].sub2
- ; CHECK-NEXT: }
+ ; CHECK-NEXT: undef [[COPY36:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_19]].sub2
+ ; CHECK-NEXT: [[COPY36:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_27:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
- ; CHECK-NEXT: undef [[COPY42:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_27]].sub2
- ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
- ; CHECK-NEXT: undef [[COPY43:%[0-9]+]].sub0:vreg_128 = COPY [[COPY42]].sub0 {
- ; CHECK-NEXT: internal [[COPY43]].sub2:vreg_128 = COPY [[COPY42]].sub2
- ; CHECK-NEXT: }
+ ; CHECK-NEXT: undef [[COPY37:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_27]].sub2
+ ; CHECK-NEXT: [[COPY37:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_28:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
- ; CHECK-NEXT: undef [[COPY44:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_28]].sub2
- ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
- ; CHECK-NEXT: undef [[COPY45:%[0-9]+]].sub0:vreg_128 = COPY [[COPY44]].sub0 {
- ; CHECK-NEXT: internal [[COPY45]].sub2:vreg_128 = COPY [[COPY44]].sub2
- ; CHECK-NEXT: }
+ ; CHECK-NEXT: undef [[COPY38:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_28]].sub2
+ ; CHECK-NEXT: [[COPY38:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_29:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
- ; CHECK-NEXT: undef [[COPY46:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_29]].sub2
- ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
- ; CHECK-NEXT: undef [[COPY47:%[0-9]+]].sub0:vreg_128 = COPY [[COPY46]].sub0 {
- ; CHECK-NEXT: internal [[COPY47]].sub2:vreg_128 = COPY [[COPY46]].sub2
- ; CHECK-NEXT: }
+ ; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_29]].sub2
+ ; CHECK-NEXT: [[COPY39:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_30:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
- ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_30]].sub2
- ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
+ ; CHECK-NEXT: undef [[COPY40:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_30]].sub2
+ ; CHECK-NEXT: [[COPY40:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_31:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
- ; CHECK-NEXT: undef [[COPY49:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_31]].sub2
- ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
+ ; CHECK-NEXT: undef [[COPY41:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_31]].sub2
+ ; CHECK-NEXT: [[COPY41:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
@@ -228,237 +198,162 @@ body: |
; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_20]], [[S_MOV_B32_]], 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
- ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub0:vreg_128 = COPY [[COPY49]].sub0 {
- ; CHECK-NEXT: internal [[COPY50]].sub2:vreg_128 = COPY [[COPY49]].sub2
+ ; CHECK-NEXT: undef [[COPY42:%[0-9]+]].sub0:vreg_128 = COPY [[COPY41]].sub0 {
+ ; CHECK-NEXT: internal [[COPY42]].sub2:vreg_128 = COPY [[COPY41]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY42]], [[S_MOV_B32_]], 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: undef [[COPY43:%[0-9]+]].sub0:vreg_128 = COPY [[COPY40]].sub0 {
+ ; CHECK-NEXT: internal [[COPY43]].sub2:vreg_128 = COPY [[COPY40]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY43:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY43:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY43]], [[S_MOV_B32_]], 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY44:%[0-9]+]].sub0:vreg_128 = COPY [[COPY39]].sub0 {
+ ; CHECK-NEXT: internal [[COPY44]].sub2:vreg_128 = COPY [[COPY39]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY44]], [[S_MOV_B32_]], 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: undef [[COPY45:%[0-9]+]].sub0:vreg_128 = COPY [[COPY38]].sub0 {
+ ; CHECK-NEXT: internal [[COPY45]].sub2:vreg_128 = COPY [[COPY38]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY45:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY45:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY45]], [[S_MOV_B32_]], 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY46:%[0-9]+]].sub0:vreg_128 = COPY [[COPY37]].sub0 {
+ ; CHECK-NEXT: internal [[COPY46]].sub2:vreg_128 = COPY [[COPY37]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY46]], [[S_MOV_B32_]], 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: undef [[COPY47:%[0-9]+]].sub0:vreg_128 = COPY [[COPY36]].sub0 {
+ ; CHECK-NEXT: internal [[COPY47]].sub2:vreg_128 = COPY [[COPY36]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY47]], [[S_MOV_B32_]], 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub0:vreg_128 = COPY [[COPY35]].sub0 {
+ ; CHECK-NEXT: internal [[COPY48]].sub2:vreg_128 = COPY [[COPY35]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY48]], [[S_MOV_B32_]], 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY49:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 {
+ ; CHECK-NEXT: internal [[COPY49]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY49]], [[S_MOV_B32_]], 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 {
+ ; CHECK-NEXT: internal [[COPY50]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY50:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[COPY50:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY50]], [[S_MOV_B32_]], 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY51:%[0-9]+]].sub0:vreg_128 = COPY [[COPY48]].sub0 {
- ; CHECK-NEXT: internal [[COPY51]].sub2:vreg_128 = COPY [[COPY48]].sub2
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY50]], [[S_MOV_B32_]], 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY51:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 {
+ ; CHECK-NEXT: internal [[COPY51]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY51:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[COPY51:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY51]], [[S_MOV_B32_]], 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: undef [[COPY52:%[0-9]+]].sub0:vreg_128 = COPY [[COPY47]].sub0 {
- ; CHECK-NEXT: internal [[COPY52]].sub2:vreg_128 = COPY [[COPY47]].sub2
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY51]], [[S_MOV_B32_]], 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY52:%[0-9]+]].sub0:vreg_128 = COPY [[COPY23]].sub0 {
+ ; CHECK-NEXT: internal [[COPY52]].sub2:vreg_128 = COPY [[COPY23]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY53:%[0-9]+]].sub0:vreg_128 = COPY [[COPY52]].sub0 {
- ; CHECK-NEXT: internal [[COPY53]].sub2:vreg_128 = COPY [[COPY52]].sub2
+ ; CHECK-NEXT: [[COPY52:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY52:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY52]], [[S_MOV_B32_]], 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY53:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 {
+ ; CHECK-NEXT: internal [[COPY53]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY53:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[COPY53:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY53]], [[S_MOV_B32_]], 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY54:%[0-9]+]].sub0:vreg_128 = COPY [[COPY45]].sub0 {
- ; CHECK-NEXT: internal [[COPY54]].sub2:vreg_128 = COPY [[COPY45]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY55:%[0-9]+]].sub0:vreg_128 = COPY [[COPY54]].sub0 {
- ; CHECK-NEXT: internal [[COPY55]].sub2:vreg_128 = COPY [[COPY54]].sub2
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY53]], [[S_MOV_B32_]], 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY54:%[0-9]+]].sub0:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub0 {
+ ; CHECK-NEXT: internal [[COPY54]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub2
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[COPY54:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY54:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY54]], [[S_MOV_B32_]], 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_11]], [[S_MOV_B32_]], 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_10]], [[S_MOV_B32_]], 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: undef [[COPY55:%[0-9]+]].sub0:vreg_128 = COPY [[COPY19]].sub0 {
+ ; CHECK-NEXT: internal [[COPY55]].sub2:vreg_128 = COPY [[COPY19]].sub2
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY55:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[COPY55:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY55]], [[S_MOV_B32_]], 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
- ; CHECK-NEXT: undef [[COPY56:%[0-9]+]].sub0:vreg_128 = COPY [[COPY43]].sub0 {
- ; CHECK-NEXT: internal [[COPY56]].sub2:vreg_128 = COPY [[COPY43]].sub2
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY55]], [[S_MOV_B32_]], 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
+ ; CHECK-NEXT: undef [[COPY56:%[0-9]+]].sub0:vreg_128 = COPY [[COPY18]].sub0 {
+ ; CHECK-NEXT: internal [[COPY56]].sub2:vreg_128 = COPY [[COPY18]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY57:%[0-9]+]].sub0:vreg_128 = COPY [[COPY56]].sub0 {
- ; CHECK-NEXT: internal [[COPY57]].sub2:vreg_128 = COPY [[COPY56]].sub2
+ ; CHECK-NEXT: [[COPY56:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY56:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY56]], [[S_MOV_B32_]], 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5)
+ ; CHECK-NEXT: undef [[COPY57:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 {
+ ; CHECK-NEXT: internal [[COPY57]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY57:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
; CHECK-NEXT: [[COPY57:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY57]], [[S_MOV_B32_]], 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY58:%[0-9]+]].sub0:vreg_128 = COPY [[COPY41]].sub0 {
- ; CHECK-NEXT: internal [[COPY58]].sub2:vreg_128 = COPY [[COPY41]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY59:%[0-9]+]].sub0:vreg_128 = COPY [[COPY58]].sub0 {
- ; CHECK-NEXT: internal [[COPY59]].sub2:vreg_128 = COPY [[COPY58]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY59]], [[S_MOV_B32_]], 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: undef [[COPY60:%[0-9]+]].sub0:vreg_128 = COPY [[COPY39]].sub0 {
- ; CHECK-NEXT: internal [[COPY60]].sub2:vreg_128 = COPY [[COPY39]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY61:%[0-9]+]].sub0:vreg_128 = COPY [[COPY60]].sub0 {
- ; CHECK-NEXT: internal [[COPY61]].sub2:vreg_128 = COPY [[COPY60]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY61]], [[S_MOV_B32_]], 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY62:%[0-9]+]].sub0:vreg_128 = COPY [[COPY37]].sub0 {
- ; CHECK-NEXT: internal [[COPY62]].sub2:vreg_128 = COPY [[COPY37]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY63:%[0-9]+]].sub0:vreg_128 = COPY [[COPY62]].sub0 {
- ; CHECK-NEXT: internal [[COPY63]].sub2:vreg_128 = COPY [[COPY62]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY63]], [[S_MOV_B32_]], 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1)
- ; CHECK-NEXT: undef [[COPY64:%[0-9]+]].sub0:vreg_128 = COPY [[COPY35]].sub0 {
- ; CHECK-NEXT: internal [[COPY64]].sub2:vreg_128 = COPY [[COPY35]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY65:%[0-9]+]].sub0:vreg_128 = COPY [[COPY64]].sub0 {
- ; CHECK-NEXT: internal [[COPY65]].sub2:vreg_128 = COPY [[COPY64]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY65]], [[S_MOV_B32_]], 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY66:%[0-9]+]].sub0:vreg_128 = COPY [[COPY33]].sub0 {
- ; CHECK-NEXT: internal [[COPY66]].sub2:vreg_128 = COPY [[COPY33]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub0:vreg_128 = COPY [[COPY66]].sub0 {
- ; CHECK-NEXT: internal [[COPY67]].sub2:vreg_128 = COPY [[COPY66]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY67]], [[S_MOV_B32_]], 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY68:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 {
- ; CHECK-NEXT: internal [[COPY68]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY69:%[0-9]+]].sub0:vreg_128 = COPY [[COPY68]].sub0 {
- ; CHECK-NEXT: internal [[COPY69]].sub2:vreg_128 = COPY [[COPY68]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY69]], [[S_MOV_B32_]], 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY70:%[0-9]+]].sub0:vreg_128 = COPY [[COPY29]].sub0 {
- ; CHECK-NEXT: internal [[COPY70]].sub2:vreg_128 = COPY [[COPY29]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY71:%[0-9]+]].sub0:vreg_128 = COPY [[COPY70]].sub0 {
- ; CHECK-NEXT: internal [[COPY71]].sub2:vreg_128 = COPY [[COPY70]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY71]], [[S_MOV_B32_]], 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY72:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 {
- ; CHECK-NEXT: internal [[COPY72]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY73:%[0-9]+]].sub0:vreg_128 = COPY [[COPY72]].sub0 {
- ; CHECK-NEXT: internal [[COPY73]].sub2:vreg_128 = COPY [[COPY72]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY73]], [[S_MOV_B32_]], 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY74:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 {
- ; CHECK-NEXT: internal [[COPY74]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY75:%[0-9]+]].sub0:vreg_128 = COPY [[COPY74]].sub0 {
- ; CHECK-NEXT: internal [[COPY75]].sub2:vreg_128 = COPY [[COPY74]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY75]], [[S_MOV_B32_]], 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY76:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 {
- ; CHECK-NEXT: internal [[COPY76]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY77:%[0-9]+]].sub0:vreg_128 = COPY [[COPY76]].sub0 {
- ; CHECK-NEXT: internal [[COPY77]].sub2:vreg_128 = COPY [[COPY76]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY77]], [[S_MOV_B32_]], 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: undef [[COPY78:%[0-9]+]].sub0:vreg_128 = COPY [[COPY21]].sub0 {
- ; CHECK-NEXT: internal [[COPY78]].sub2:vreg_128 = COPY [[COPY21]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY79:%[0-9]+]].sub0:vreg_128 = COPY [[COPY78]].sub0 {
- ; CHECK-NEXT: internal [[COPY79]].sub2:vreg_128 = COPY [[COPY78]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY79]], [[S_MOV_B32_]], 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
- ; CHECK-NEXT: undef [[COPY80:%[0-9]+]].sub0:vreg_128 = COPY [[COPY19]].sub0 {
- ; CHECK-NEXT: internal [[COPY80]].sub2:vreg_128 = COPY [[COPY19]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY81:%[0-9]+]].sub0:vreg_128 = COPY [[COPY80]].sub0 {
- ; CHECK-NEXT: internal [[COPY81]].sub2:vreg_128 = COPY [[COPY80]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY81]], [[S_MOV_B32_]], 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY82:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 {
- ; CHECK-NEXT: internal [[COPY82]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY83:%[0-9]+]].sub0:vreg_128 = COPY [[COPY82]].sub0 {
- ; CHECK-NEXT: internal [[COPY83]].sub2:vreg_128 = COPY [[COPY82]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY83]], [[S_MOV_B32_]], 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY57]], [[S_MOV_B32_]], 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY84:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 {
- ; CHECK-NEXT: internal [[COPY84]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY85:%[0-9]+]].sub0:vreg_128 = COPY [[COPY84]].sub0 {
- ; CHECK-NEXT: internal [[COPY85]].sub2:vreg_128 = COPY [[COPY84]].sub2
+ ; CHECK-NEXT: undef [[COPY58:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 {
+ ; CHECK-NEXT: internal [[COPY58]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY85]], [[S_MOV_B32_]], 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[COPY58:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY58:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY58]], [[S_MOV_B32_]], 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY86:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 {
- ; CHECK-NEXT: internal [[COPY86]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2
+ ; CHECK-NEXT: undef [[COPY59:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 {
+ ; CHECK-NEXT: internal [[COPY59]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY87:%[0-9]+]].sub0:vreg_128 = COPY [[COPY86]].sub0 {
- ; CHECK-NEXT: internal [[COPY87]].sub2:vreg_128 = COPY [[COPY86]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY87]], [[S_MOV_B32_]], 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
+ ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY59]], [[S_MOV_B32_]], 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY88:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 {
- ; CHECK-NEXT: internal [[COPY88]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2
+ ; CHECK-NEXT: undef [[COPY60:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 {
+ ; CHECK-NEXT: internal [[COPY60]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY89:%[0-9]+]].sub0:vreg_128 = COPY [[COPY88]].sub0 {
- ; CHECK-NEXT: internal [[COPY89]].sub2:vreg_128 = COPY [[COPY88]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY89]], [[S_MOV_B32_]], 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[COPY60:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY60:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY60]], [[S_MOV_B32_]], 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY90:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 {
- ; CHECK-NEXT: internal [[COPY90]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY91:%[0-9]+]].sub0:vreg_128 = COPY [[COPY90]].sub0 {
- ; CHECK-NEXT: internal [[COPY91]].sub2:vreg_128 = COPY [[COPY90]].sub2
+ ; CHECK-NEXT: undef [[COPY61:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 {
+ ; CHECK-NEXT: internal [[COPY61]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY91]], [[S_MOV_B32_]], 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
+ ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY61]], [[S_MOV_B32_]], 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY92:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 {
- ; CHECK-NEXT: internal [[COPY92]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY93:%[0-9]+]].sub0:vreg_128 = COPY [[COPY92]].sub0 {
- ; CHECK-NEXT: internal [[COPY93]].sub2:vreg_128 = COPY [[COPY92]].sub2
+ ; CHECK-NEXT: undef [[COPY62:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 {
+ ; CHECK-NEXT: internal [[COPY62]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY93]], [[S_MOV_B32_]], 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[COPY62:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY62:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY62]], [[S_MOV_B32_]], 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY94:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 {
- ; CHECK-NEXT: internal [[COPY94]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY95:%[0-9]+]].sub0:vreg_128 = COPY [[COPY94]].sub0 {
- ; CHECK-NEXT: internal [[COPY95]].sub2:vreg_128 = COPY [[COPY94]].sub2
+ ; CHECK-NEXT: undef [[COPY63:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 {
+ ; CHECK-NEXT: internal [[COPY63]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY95]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1)
+ ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY63]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY96:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 {
- ; CHECK-NEXT: internal [[COPY96]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2
- ; CHECK-NEXT: }
- ; CHECK-NEXT: undef [[COPY97:%[0-9]+]].sub0:vreg_128 = COPY [[COPY96]].sub0 {
- ; CHECK-NEXT: internal [[COPY97]].sub2:vreg_128 = COPY [[COPY96]].sub2
+ ; CHECK-NEXT: undef [[COPY64:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 {
+ ; CHECK-NEXT: internal [[COPY64]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2
; CHECK-NEXT: }
- ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
- ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY97]], [[S_MOV_B32_]], 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: [[COPY64:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: [[COPY64:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
+ ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY64]], [[S_MOV_B32_]], 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_64(p4) = COPY $sgpr0_sgpr1
%1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4)
diff --git a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll
index 07839a43331f08..8044bf2f6b45e7 100644
--- a/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll
+++ b/llvm/test/CodeGen/AVR/inline-asm/inline-asm3.ll
@@ -231,8 +231,8 @@ define void @add_e_i8(i8 signext %0, i8 signext %1) {
; CHECK-NEXT: mov r26, r26
; CHECK-NEXT: add r26, r30
; CHECK-NEXT: ;NO_APP
-; CHECK-NEXT: mov r20, r30
; CHECK-NEXT: mov r24, r26
+; CHECK-NEXT: mov r20, r30
; CHECK-NEXT: rcall foo8
; CHECK-NEXT: ret
%3 = tail call i8 asm sideeffect "mov $0, $1\0Aadd $0, $2", "=e,e,e"(i8 %0, i8 %1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index a91dee1cb245f9..46c190532052a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -411,8 +411,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: slli a4, a4, 4
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv4r.v v24, v16
+; CHECK-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: slli a4, a4, 3
; CHECK-NEXT: add a4, sp, a4
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 7ca1983e8b32c0..7f65a0d0c50e83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -927,13 +927,12 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfl
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a2
-; CHECK-NEXT: vmv4r.v v8, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a4, a2, 4
; CHECK-NEXT: add a2, a4, a2
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
; CHECK-NEXT: csrr a2, vlenb
@@ -1080,13 +1079,12 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat>
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a2
-; CHECK-NEXT: vmv4r.v v8, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a4, a2, 4
; CHECK-NEXT: add a2, a4, a2
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
; CHECK-NEXT: csrr a2, vlenb
@@ -1221,12 +1219,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat>
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a2
-; CHECK-NEXT: vmv4r.v v8, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
; CHECK-NEXT: addi a2, sp, 16
@@ -1324,12 +1321,11 @@ define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v24, a2
-; CHECK-NEXT: vmv4r.v v8, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
; CHECK-NEXT: addi a2, sp, 16
@@ -2505,13 +2501,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a2
-; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a4, a2, 4
; ZVFHMIN-NEXT: add a2, a4, a2
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a2, vlenb
@@ -2664,13 +2659,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a2
-; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a4, a2, 4
; ZVFHMIN-NEXT: add a2, a4, a2
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a2, vlenb
@@ -2811,12 +2805,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a2
-; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: addi a2, sp, 16
@@ -2920,12 +2913,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a2
-; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: addi a2, sp, 16
@@ -12499,7 +12491,6 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t
; ZVFHMIN-NEXT: slli a2, a1, 1
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: vmv4r.v v20, v28
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: mv a5, a4
@@ -12507,7 +12498,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 85317e1fe4626a..51b52be9a84d05 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -405,31 +405,28 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #48
; CHECK-NEXT: sub sp, #48
-; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
+; CHECK-NEXT: vldrw.u32 q6, [r0]
+; CHECK-NEXT: vldrw.u32 q2, [r0, #64]
; CHECK-NEXT: vldrw.u32 q7, [r0, #80]
-; CHECK-NEXT: vmov.f32 s0, s11
+; CHECK-NEXT: vmov.f32 s0, s27
; CHECK-NEXT: vmov.u16 r2, q1[5]
; CHECK-NEXT: vmov.16 q3[0], r2
; CHECK-NEXT: vins.f16 s0, s7
-; CHECK-NEXT: vmov.f32 s2, s11
-; CHECK-NEXT: vmov.u16 r2, q1[7]
-; CHECK-NEXT: vmov.f64 d12, d4
-; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
-; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT: vmov.f32 s26, s10
-; CHECK-NEXT: vldrw.u32 q2, [r0, #64]
; CHECK-NEXT: vmov.f32 s13, s0
-; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill
+; CHECK-NEXT: vmov.u16 r2, q1[7]
; CHECK-NEXT: vmov.16 q3[6], r2
; CHECK-NEXT: vmovx.f16 s0, s10
; CHECK-NEXT: vins.f16 s12, s0
-; CHECK-NEXT: vmovx.f16 s0, s2
+; CHECK-NEXT: vmovx.f16 s0, s27
; CHECK-NEXT: vmov.f32 s14, s11
+; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vins.f16 s14, s0
-; CHECK-NEXT: vmov.f32 s20, s7
+; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
; CHECK-NEXT: vmov q0, q3
; CHECK-NEXT: vldrw.u32 q3, [r0, #48]
+; CHECK-NEXT: vmov.f32 s20, s7
+; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill
; CHECK-NEXT: vmov.u16 r2, q3[5]
; CHECK-NEXT: vins.f16 s20, s15
; CHECK-NEXT: vmov.16 q4[0], r2
@@ -472,29 +469,27 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
; CHECK-NEXT: vmov.u16 r0, q3[3]
; CHECK-NEXT: vins.f16 s4, s14
; CHECK-NEXT: vmov.16 q0[2], r0
-; CHECK-NEXT: vins.f16 s26, s8
+; CHECK-NEXT: vmov.f32 s18, s31
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmovx.f16 s4, s29
+; CHECK-NEXT: vmovx.f16 s0, s5
; CHECK-NEXT: vins.f16 s1, s4
; CHECK-NEXT: vmovx.f16 s4, s6
-; CHECK-NEXT: vmovx.f16 s0, s5
-; CHECK-NEXT: vins.f16 s30, s4
-; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
; CHECK-NEXT: vins.f16 s29, s0
+; CHECK-NEXT: vins.f16 s30, s4
+; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT: vmov.f32 s25, s28
+; CHECK-NEXT: vins.f16 s26, s8
; CHECK-NEXT: vmov.f32 s0, s29
-; CHECK-NEXT: vins.f16 s22, s11
+; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.f32 s3, s30
-; CHECK-NEXT: vstrw.32 q5, [r1]
-; CHECK-NEXT: vmov.f32 s29, s5
+; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload
+; CHECK-NEXT: vins.f16 s22, s11
+; CHECK-NEXT: vstrw.32 q6, [r1, #48]
+; CHECK-NEXT: vmov.f32 s8, s30
; CHECK-NEXT: vstrw.32 q0, [r1, #64]
-; CHECK-NEXT: vmov.f32 s30, s6
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT: vmov.f32 s18, s31
-; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vins.f16 s8, s6
; CHECK-NEXT: vmov.16 q1[2], r0
-; CHECK-NEXT: vmov.f32 s25, s28
; CHECK-NEXT: vmov.f32 s6, s8
; CHECK-NEXT: vmovx.f16 s8, s9
; CHECK-NEXT: vmovx.f16 s4, s29
@@ -504,10 +499,10 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
; CHECK-NEXT: vins.f16 s10, s8
; CHECK-NEXT: vmov.f32 s4, s9
; CHECK-NEXT: vmov.f32 s7, s10
-; CHECK-NEXT: vstrw.32 q6, [r1, #48]
+; CHECK-NEXT: vstrw.32 q4, [r1, #80]
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT: vstrw.32 q4, [r1, #80]
+; CHECK-NEXT: vstrw.32 q5, [r1]
; CHECK-NEXT: vstrw.32 q1, [r1, #32]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
@@ -1043,10 +1038,10 @@ define void @vst3_v8f32(ptr %src, ptr %dst) {
; CHECK-NEXT: vstrw.32 q5, [r1]
; CHECK-NEXT: vmov.f32 s10, s19
; CHECK-NEXT: vmov.f32 s11, s31
-; CHECK-NEXT: vmov.f32 s5, s29
-; CHECK-NEXT: vstrw.32 q2, [r1, #32]
; CHECK-NEXT: vmov.f32 s4, s17
+; CHECK-NEXT: vstrw.32 q2, [r1, #32]
; CHECK-NEXT: vmov.f32 s7, s18
+; CHECK-NEXT: vmov.f32 s5, s29
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
@@ -1332,22 +1327,24 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #48
; CHECK-NEXT: sub sp, #48
-; CHECK-NEXT: vldrw.u32 q3, [r0, #16]
+; CHECK-NEXT: vldrw.u32 q5, [r0, #16]
; CHECK-NEXT: vldrw.u32 q1, [r0, #48]
; CHECK-NEXT: vldrw.u32 q6, [r0, #32]
-; CHECK-NEXT: vmov.f32 s8, s12
+; CHECK-NEXT: vmov.f32 s8, s20
; CHECK-NEXT: vmovx.f16 s2, s4
-; CHECK-NEXT: vmov.f32 s0, s13
+; CHECK-NEXT: vmov.f32 s0, s21
; CHECK-NEXT: vins.f16 s8, s4
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vins.f16 s0, s5
; CHECK-NEXT: vmov.16 q2[4], r2
-; CHECK-NEXT: vmov q4, q3
+; CHECK-NEXT: vmov.f64 d15, d13
; CHECK-NEXT: vmov.f32 s11, s0
-; CHECK-NEXT: vmovx.f16 s0, s16
+; CHECK-NEXT: vmovx.f16 s0, s20
; CHECK-NEXT: vmov.f32 s12, s8
-; CHECK-NEXT: vmov.f64 d11, d9
-; CHECK-NEXT: vmov.f32 s21, s17
+; CHECK-NEXT: vmov.f32 s29, s25
+; CHECK-NEXT: vmov.f32 s4, s23
+; CHECK-NEXT: vstrw.32 q7, [sp] @ 16-byte Spill
+; CHECK-NEXT: vins.f16 s4, s7
; CHECK-NEXT: vmov.f64 d7, d5
; CHECK-NEXT: vldrw.u32 q2, [r0, #80]
; CHECK-NEXT: vmovx.f16 s2, s8
@@ -1364,7 +1361,6 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vins.f16 s0, s25
; CHECK-NEXT: vmov.f32 s19, s0
; CHECK-NEXT: vmovx.f16 s0, s12
-; CHECK-NEXT: vmov.f64 d15, d13
; CHECK-NEXT: vmov.f32 s17, s13
; CHECK-NEXT: vmov.f32 s24, s16
; CHECK-NEXT: vmov.f64 d13, d9
@@ -1378,30 +1374,28 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vstrw.32 q6, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vins.f16 s0, s31
-; CHECK-NEXT: vmov.f32 s29, s25
; CHECK-NEXT: vmov.16 q6[0], r0
+; CHECK-NEXT: vmovx.f16 s2, s15
; CHECK-NEXT: vmov.f32 s25, s0
; CHECK-NEXT: vmovx.f16 s0, s31
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmovx.f16 s0, s14
; CHECK-NEXT: vmov.16 q6[6], r0
-; CHECK-NEXT: vmovx.f16 s2, s15
+; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vins.f16 s24, s0
; CHECK-NEXT: vmovx.f16 s0, s19
; CHECK-NEXT: vins.f16 s15, s0
; CHECK-NEXT: vmovx.f16 s0, s6
-; CHECK-NEXT: vmov.f32 s4, s23
-; CHECK-NEXT: vins.f16 s27, s2
; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: vins.f16 s4, s7
+; CHECK-NEXT: vins.f16 s27, s2
; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: vstrw.32 q7, [sp] @ 16-byte Spill
+; CHECK-NEXT: vmov.f32 s29, s12
; CHECK-NEXT: vmov.f32 s1, s4
; CHECK-NEXT: vmovx.f16 s4, s7
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmovx.f16 s4, s10
; CHECK-NEXT: vmov.16 q0[6], r0
-; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT: vmovx.f16 s12, s9
; CHECK-NEXT: vins.f16 s0, s4
; CHECK-NEXT: vmovx.f16 s4, s11
; CHECK-NEXT: vmovx.f16 s2, s23
@@ -1409,21 +1403,20 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vmovx.f16 s4, s5
; CHECK-NEXT: vins.f16 s11, s2
; CHECK-NEXT: vmov.f32 s2, s22
+; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vins.f16 s2, s6
; CHECK-NEXT: vmov.16 q1[2], r0
-; CHECK-NEXT: vmov.f32 s29, s12
+; CHECK-NEXT: vmov.f32 s26, s15
; CHECK-NEXT: vmovx.f16 s4, s21
-; CHECK-NEXT: vmovx.f16 s12, s9
+; CHECK-NEXT: vmov.f32 s21, s17
; CHECK-NEXT: vins.f16 s9, s4
; CHECK-NEXT: vmovx.f16 s4, s22
; CHECK-NEXT: vins.f16 s10, s4
-; CHECK-NEXT: vmov.f32 s21, s17
; CHECK-NEXT: vmov.f32 s22, s18
-; CHECK-NEXT: vins.f16 s5, s12
; CHECK-NEXT: vmov.f32 s4, s18
; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload
-; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT: vins.f16 s5, s12
; CHECK-NEXT: vmov.f32 s6, s2
; CHECK-NEXT: vmovx.f16 s12, s17
; CHECK-NEXT: vins.f16 s4, s18
@@ -1439,17 +1432,16 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vldrw.u32 q5, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r1, #80]
; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT: vmov.f32 s26, s15
-; CHECK-NEXT: vins.f16 s29, s12
; CHECK-NEXT: vmov.f32 s21, s8
-; CHECK-NEXT: vstrw.32 q6, [r1, #32]
+; CHECK-NEXT: vins.f16 s29, s12
; CHECK-NEXT: vmov.f32 s4, s9
-; CHECK-NEXT: vstrw.32 q5, [r1, #48]
+; CHECK-NEXT: vstrw.32 q6, [r1, #32]
; CHECK-NEXT: vmov.f32 s7, s10
-; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: vstrw.32 q5, [r1, #48]
; CHECK-NEXT: vmov.f32 s28, s13
; CHECK-NEXT: vstrw.32 q1, [r1, #64]
; CHECK-NEXT: vmov.f32 s31, s14
+; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vstrw.32 q7, [r1, #16]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index b36904495e878d..013875ab0348ee 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -119,22 +119,20 @@ define void @vst4_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: sub sp, #192
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
; CHECK-NEXT: add r2, sp, #64
-; CHECK-NEXT: vldrw.u32 q4, [r0, #176]
; CHECK-NEXT: vldrw.u32 q3, [r0, #208]
; CHECK-NEXT: vldrw.u32 q2, [r0, #144]
; CHECK-NEXT: vldrw.u32 q1, [r0, #80]
; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q2, [r0, #128]
-; CHECK-NEXT: vldrw.u32 q5, [r0, #240]
-; CHECK-NEXT: vmov q6, q4
; CHECK-NEXT: vldrw.u32 q3, [r0, #192]
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill
+; CHECK-NEXT: vldrw.u32 q6, [r0, #176]
; CHECK-NEXT: vldrw.u32 q2, [r0, #160]
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
; CHECK-NEXT: add r2, sp, #128
-; CHECK-NEXT: vmov q7, q5
+; CHECK-NEXT: vldrw.u32 q7, [r0, #240]
; CHECK-NEXT: vldrw.u32 q3, [r0, #224]
; CHECK-NEXT: vldrw.u32 q1, [r0, #96]
; CHECK-NEXT: vldrw.u32 q5, [r0, #112]
@@ -885,22 +883,20 @@ define void @vst4_v16f32(ptr %src, ptr %dst) {
; CHECK-NEXT: sub sp, #192
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
; CHECK-NEXT: add r2, sp, #64
-; CHECK-NEXT: vldrw.u32 q4, [r0, #176]
; CHECK-NEXT: vldrw.u32 q3, [r0, #208]
; CHECK-NEXT: vldrw.u32 q2, [r0, #144]
; CHECK-NEXT: vldrw.u32 q1, [r0, #80]
; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q2, [r0, #128]
-; CHECK-NEXT: vldrw.u32 q5, [r0, #240]
-; CHECK-NEXT: vmov q6, q4
; CHECK-NEXT: vldrw.u32 q3, [r0, #192]
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill
+; CHECK-NEXT: vldrw.u32 q6, [r0, #176]
; CHECK-NEXT: vldrw.u32 q2, [r0, #160]
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
; CHECK-NEXT: add r2, sp, #128
-; CHECK-NEXT: vmov q7, q5
+; CHECK-NEXT: vldrw.u32 q7, [r0, #240]
; CHECK-NEXT: vldrw.u32 q3, [r0, #224]
; CHECK-NEXT: vldrw.u32 q1, [r0, #96]
; CHECK-NEXT: vldrw.u32 q5, [r0, #112]
More information about the llvm-commits
mailing list