[llvm] 6f6d389 - [SplitKit] Only copy live lanes

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 17 01:26:41 PDT 2020


Author: Jay Foad
Date: 2020-09-17T09:26:11+01:00
New Revision: 6f6d389da5c37e5e9a900902f03dc649d57919b7

URL: https://github.com/llvm/llvm-project/commit/6f6d389da5c37e5e9a900902f03dc649d57919b7
DIFF: https://github.com/llvm/llvm-project/commit/6f6d389da5c37e5e9a900902f03dc649d57919b7.diff

LOG: [SplitKit] Only copy live lanes

When splitting a live interval with subranges, only insert copies for
the lanes that are live at the point of the split. This avoids some
unnecessary copies and fixes a problem where copying dead lanes was
generating MIR that failed verification. The test case for this is
test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir.

Without this fix, some earlier live range splitting would create %430:

%430 [256r,848r:0)[848r,2584r:1)  0 at 256r 1 at 848r L0000000000000003 [848r,2584r:0)  0 at 848r L0000000000000030 [256r,2584r:0)  0 at 256r weight:1.480938e-03
...
256B     undef %430.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec
...
848B     %430.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec
...
2584B    %431:vreg_128 = COPY %430:vreg_128

Then RAGreedy::tryLocalSplit would split %430 into %432 and %433 just
before 848B giving:

%432 [256r,844r:0)  0 at 256r L0000000000000030 [256r,844r:0)  0 at 256r weight:3.066802e-03
%433 [844r,848r:0)[848r,2584r:1)  0 at 844r 1 at 848r L0000000000000030 [844r,2584r:0)  0 at 844r L0000000000000003 [844r,844d:0)[848r,2584r:1)  0 at 844r 1 at 848r weight:2.831776e-03
...
256B     undef %432.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec
...
844B     undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 {
           internal %433.sub2:vreg_128 = COPY %432.sub2:vreg_128
848B     }
  %433.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec
...
2584B    %431:vreg_128 = COPY %433:vreg_128

Note that the copy from %432 to %433 at 844B is a curious
bundle-without-a-BUNDLE-instruction that SplitKit creates deliberately,
and it includes a copy of .sub0 which is not live at this point, and
that causes it to fail verification:

*** Bad machine code: No live subrange at use ***
- function:    zextload_global_v64i16_to_v64i64
- basic block: %bb.0  (0x7faed48) [0B;2848B)
- instruction: 844B    undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128
- operand 1:   %432.sub0:vreg_128
- interval:    %432 [256r,844r:0)  0 at 256r L0000000000000030 [256r,844r:0)  0 at 256r weight:3.066802e-03
- at:          844B

Using real bundles with a BUNDLE instruction might also fix this
problem, but the current fix is less invasive and also avoids some
unnecessary copies.

https://bugs.llvm.org/show_bug.cgi?id=47492

Differential Revision: https://reviews.llvm.org/D87757

Added: 
    llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir

Modified: 
    llvm/lib/CodeGen/SplitKit.cpp
    llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
    llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
    llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 372c7f806129..4029c855c910 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -649,10 +649,13 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   }
   if (!DidRemat) {
     LaneBitmask LaneMask;
-    if (LI->hasSubRanges()) {
+    if (OrigLI.hasSubRanges()) {
       LaneMask = LaneBitmask::getNone();
-      for (LiveInterval::SubRange &S : LI->subranges())
-        LaneMask |= S.LaneMask;
+      for (LiveInterval::SubRange &S : OrigLI.subranges()) {
+        if (S.liveAt(UseIdx))
+          LaneMask |= S.LaneMask;
+      }
+      assert(LaneMask.any() && "Interval has no live subranges");
     } else {
       LaneMask = LaneBitmask::getAll();
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index d2434682eebc..5695487d58d8 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -39,7 +39,7 @@ entry:
 ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32
 ; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9]+]]
 ; GFX6: NumSgprs: 48
-; GFX6: ScratchSize: 8624
+; GFX6: ScratchSize: 8608
 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 {
 entry:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
index c02b9a001fbb..c9f3a82cf695 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -16,17 +16,11 @@ body:             |
   ; RA:   [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; RA:   undef %5.sub1:sgpr_1024 = S_MOV_B32 -1
   ; RA:   %5.sub0:sgpr_1024 = S_MOV_B32 -1
-  ; RA:   undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
-  ; RA:     internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
-  ; RA:     internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29
-  ; RA:   }
+  ; RA:   undef %4.sub0_sub1:sgpr_1024 = COPY %5.sub0_sub1
   ; RA:   undef %3.sub0:sgpr_1024 = S_MOV_B32 0
   ; RA: bb.1:
   ; RA:   successors: %bb.2(0x80000000)
-  ; RA:   undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
-  ; RA:     internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
-  ; RA:     internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29
-  ; RA:   }
+  ; RA:   undef %6.sub0_sub1:sgpr_1024 = COPY %4.sub0_sub1
   ; RA:   %6.sub2:sgpr_1024 = COPY %6.sub0
   ; RA:   %6.sub3:sgpr_1024 = COPY %6.sub1
   ; RA:   %6.sub4:sgpr_1024 = COPY %6.sub0
@@ -55,10 +49,7 @@ body:             |
   ; RA:   %6.sub27:sgpr_1024 = COPY %6.sub1
   ; RA:   %6.sub28:sgpr_1024 = COPY %6.sub0
   ; RA:   %6.sub29:sgpr_1024 = COPY %6.sub1
-  ; RA:   undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
-  ; RA:     internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
-  ; RA:     internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29
-  ; RA:   }
+  ; RA:   undef %4.sub0_sub1:sgpr_1024 = COPY %6.sub0_sub1
   ; RA:   %3.sub1:sgpr_1024 = COPY %3.sub0
   ; RA:   %3.sub2:sgpr_1024 = COPY %3.sub0
   ; RA:   %3.sub3:sgpr_1024 = COPY %3.sub0
@@ -102,40 +93,40 @@ body:             |
   ; VR:   renamable $sgpr68 = S_MOV_B32 -1
   ; VR:   renamable $sgpr36 = S_MOV_B32 0
   ; VR:   renamable $sgpr34_sgpr35 = IMPLICIT_DEF
-  ; VR:   renamable $sgpr98_sgpr99 = IMPLICIT_DEF
-  ; VR:   renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-  ; VR:   renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97
+  ; VR:   renamable $sgpr70_sgpr71 = IMPLICIT_DEF
   ; VR: bb.1:
   ; VR:   successors: %bb.2(0x80000000)
-  ; VR:   liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99
-  ; VR:   renamable $sgpr70 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr71 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr72 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr73 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr74 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr75 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr76 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr77 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr78 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr79 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr80 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr81 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr82 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr83 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr84 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr85 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr86 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr87 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr88 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr89 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr90 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr91 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr92 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr93 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr94 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr95 = COPY renamable $sgpr69
-  ; VR:   renamable $sgpr96 = COPY renamable $sgpr68
-  ; VR:   renamable $sgpr97 = COPY renamable $sgpr69
+  ; VR:   liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
+  ; VR:   renamable $sgpr40_sgpr41 = COPY killed renamable $sgpr68_sgpr69
+  ; VR:   renamable $sgpr42 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr43 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr44 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr45 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr46 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr47 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr48 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr49 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr50 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr51 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr52 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr53 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr54 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr55 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr56 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr57 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr58 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr59 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr60 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr61 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr62 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr63 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr64 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr65 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr66 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr67 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr68 = COPY renamable $sgpr40
+  ; VR:   renamable $sgpr69 = COPY renamable $sgpr41
+  ; VR:   renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr40_sgpr41
   ; VR:   renamable $sgpr37 = COPY renamable $sgpr36
   ; VR:   renamable $sgpr38 = COPY renamable $sgpr36
   ; VR:   renamable $sgpr39 = COPY renamable $sgpr36
@@ -169,8 +160,8 @@ body:             |
   ; VR:   renamable $sgpr67 = COPY renamable $sgpr36
   ; VR: bb.2:
   ; VR:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; VR:   liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0FFFFFFFFFFFFFFF, $sgpr34_sgpr35, $sgpr98_sgpr99
-  ; VR:   S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr98_sgpr99
+  ; VR:   liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x0000000000000003, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x000000000000000F, $sgpr34_sgpr35, $sgpr70_sgpr71
+  ; VR:   S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr70_sgpr71
   ; VR:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
   ; VR:   S_BRANCH %bb.2
   bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
new file mode 100644
index 000000000000..56ebf9305dbd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
@@ -0,0 +1,525 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -verify-regalloc -run-pass=greedy %s -o - | FileCheck %s
+
+---
+name: zextload_global_v64i16_to_v64i64
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+    ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+    ; CHECK: undef %2.sub3:sgpr_128 = S_MOV_B32 61440
+    ; CHECK: %2.sub2:sgpr_128 = S_MOV_B32 -1
+    ; CHECK: %2.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+    ; CHECK: %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub1
+    ; CHECK: undef %3.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
+    ; CHECK: %3.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub3
+    ; CHECK: %3.sub2:sgpr_128 = COPY %2.sub2
+    ; CHECK: %3.sub3:sgpr_128 = COPY %2.sub3
+    ; CHECK: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec {
+    ; CHECK:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
+    ; CHECK:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    ; CHECK:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+    ; CHECK:   [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    ; CHECK: }
+    ; CHECK: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
+    ; CHECK: undef %52.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %52, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5)
+    ; CHECK: undef %57.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %57, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5)
+    ; CHECK: undef %62.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %62, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5)
+    ; CHECK: undef %67.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec
+    ; CHECK: undef %71.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %71, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5)
+    ; CHECK: undef %76.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %76, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5)
+    ; CHECK: undef %81.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %81, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5)
+    ; CHECK: undef %86.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec
+    ; CHECK: undef %90.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %90, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5)
+    ; CHECK: undef %95.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %95, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5)
+    ; CHECK: undef %100.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %100, %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5)
+    ; CHECK: undef %105.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
+    ; CHECK: undef %109.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
+    ; CHECK: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
+    ; CHECK: undef %117.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5)
+    ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
+    ; CHECK: undef %122.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
+    ; CHECK: undef %126.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
+    ; CHECK: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5)
+    ; CHECK: undef %135.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5)
+    ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    ; CHECK: undef %140.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
+    ; CHECK: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5)
+    ; CHECK: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5)
+    ; CHECK: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
+    ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+    ; CHECK: undef %158.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
+    ; CHECK: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
+    ; CHECK: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
+    ; CHECK: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
+    ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    ; CHECK: undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec
+    ; CHECK: undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec
+    ; CHECK: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec
+    ; CHECK: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+    ; CHECK: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE]], %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE1]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE1]], %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE2]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE2]], %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE3]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE3]], %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5)
+    ; CHECK: undef %68.sub2:vreg_128 = COPY %67.sub2
+    ; CHECK: %68.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec
+    ; CHECK: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE4]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE4]], %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE5]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE5]], %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE6]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE6]], %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5)
+    ; CHECK: undef %87.sub2:vreg_128 = COPY %86.sub2
+    ; CHECK: %87.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec
+    ; CHECK: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE7]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE7]], %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE8]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE8]], %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE9]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE9]], %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5)
+    ; CHECK: undef %106.sub2:vreg_128 = COPY %105.sub2
+    ; CHECK: %106.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
+    ; CHECK: undef %110.sub2:vreg_128 = COPY %109.sub2
+    ; CHECK: %110.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
+    ; CHECK: undef %114.sub2:vreg_128 = COPY %113.sub2
+    ; CHECK: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
+    ; CHECK: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE10]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5)
+    ; CHECK: undef %123.sub2:vreg_128 = COPY %122.sub2
+    ; CHECK: %123.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
+    ; CHECK: undef %127.sub2:vreg_128 = COPY %126.sub2
+    ; CHECK: %127.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
+    ; CHECK: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5)
+    ; CHECK: undef %141.sub2:vreg_128 = COPY %140.sub2
+    ; CHECK: %141.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
+    ; CHECK: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5)
+    ; CHECK: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
+    ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5)
+    ; CHECK: undef %155.sub2:vreg_128 = COPY %154.sub2
+    ; CHECK: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
+    ; CHECK: undef %159.sub2:vreg_128 = COPY %158.sub2
+    ; CHECK: %159.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
+    ; CHECK: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
+    ; CHECK: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
+    ; CHECK: %38.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
+    ; CHECK: %40.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec
+    ; CHECK: %41.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec
+    ; CHECK: %42.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec
+    ; CHECK: %43.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec
+    ; CHECK: %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK: %43.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: %42.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %42.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: %41.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %41.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    ; CHECK: %40.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %40.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: %38.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %38.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: %37.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %37.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: %36.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %36.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+    ; CHECK: undef %157.sub0:vreg_128 = COPY %159.sub0 {
+    ; CHECK:   internal %157.sub2:vreg_128 = COPY %159.sub2
+    ; CHECK: }
+    ; CHECK: %157.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %157.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: undef %153.sub0:vreg_128 = COPY %155.sub0 {
+    ; CHECK:   internal %153.sub2:vreg_128 = COPY %155.sub2
+    ; CHECK: }
+    ; CHECK: %153.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %153.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5)
+    ; CHECK: undef %148.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 {
+    ; CHECK:   internal %148.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2
+    ; CHECK: }
+    ; CHECK: %148.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %148.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5)
+    ; CHECK: undef %143.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 {
+    ; CHECK:   internal %143.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2
+    ; CHECK: }
+    ; CHECK: %143.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %143.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    ; CHECK: undef %139.sub0:vreg_128 = COPY %141.sub0 {
+    ; CHECK:   internal %139.sub2:vreg_128 = COPY %141.sub2
+    ; CHECK: }
+    ; CHECK: %139.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %139.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5)
+    ; CHECK: undef %134.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 {
+    ; CHECK:   internal %134.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2
+    ; CHECK: }
+    ; CHECK: %134.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %134.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5)
+    ; CHECK: undef %129.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 {
+    ; CHECK:   internal %129.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2
+    ; CHECK: }
+    ; CHECK: %129.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %129.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: undef %125.sub0:vreg_128 = COPY %127.sub0 {
+    ; CHECK:   internal %125.sub2:vreg_128 = COPY %127.sub2
+    ; CHECK: }
+    ; CHECK: %125.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %125.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
+    ; CHECK: undef %121.sub0:vreg_128 = COPY %123.sub0 {
+    ; CHECK:   internal %121.sub2:vreg_128 = COPY %123.sub2
+    ; CHECK: }
+    ; CHECK: %121.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %121.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5)
+    ; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 {
+    ; CHECK:   internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2
+    ; CHECK: }
+    ; CHECK: %116.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %116.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: undef %112.sub0:vreg_128 = COPY %114.sub0 {
+    ; CHECK:   internal %112.sub2:vreg_128 = COPY %114.sub2
+    ; CHECK: }
+    ; CHECK: %112.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %112.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: undef %108.sub0:vreg_128 = COPY %110.sub0 {
+    ; CHECK:   internal %108.sub2:vreg_128 = COPY %110.sub2
+    ; CHECK: }
+    ; CHECK: %108.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %108.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    ; CHECK: undef %104.sub0:vreg_128 = COPY %106.sub0 {
+    ; CHECK:   internal %104.sub2:vreg_128 = COPY %106.sub2
+    ; CHECK: }
+    ; CHECK: %104.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %104.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5)
+    ; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 {
+    ; CHECK:   internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2
+    ; CHECK: }
+    ; CHECK: %99.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %99.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5)
+    ; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 {
+    ; CHECK:   internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2
+    ; CHECK: }
+    ; CHECK: %94.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %94.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5)
+    ; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 {
+    ; CHECK:   internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2
+    ; CHECK: }
+    ; CHECK: %89.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %89.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+    ; CHECK: undef %85.sub0:vreg_128 = COPY %87.sub0 {
+    ; CHECK:   internal %85.sub2:vreg_128 = COPY %87.sub2
+    ; CHECK: }
+    ; CHECK: %85.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %85.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5)
+    ; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 {
+    ; CHECK:   internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2
+    ; CHECK: }
+    ; CHECK: %80.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %80.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5)
+    ; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 {
+    ; CHECK:   internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2
+    ; CHECK: }
+    ; CHECK: %75.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %75.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5)
+    ; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 {
+    ; CHECK:   internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2
+    ; CHECK: }
+    ; CHECK: %70.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %70.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    ; CHECK: undef %66.sub0:vreg_128 = COPY %68.sub0 {
+    ; CHECK:   internal %66.sub2:vreg_128 = COPY %68.sub2
+    ; CHECK: }
+    ; CHECK: %66.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %66.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5)
+    ; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 {
+    ; CHECK:   internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2
+    ; CHECK: }
+    ; CHECK: %61.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %61.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5)
+    ; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 {
+    ; CHECK:   internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2
+    ; CHECK: }
+    ; CHECK: %56.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %56.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5)
+    ; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 {
+    ; CHECK:   internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2
+    ; CHECK: }
+    ; CHECK: %51.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %51.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
+    ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
+    ; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 {
+    ; CHECK:   internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2
+    ; CHECK: }
+    ; CHECK: %46.sub1:vreg_128 = COPY %43.sub1
+    ; CHECK: %46.sub3:vreg_128 = COPY %43.sub1
+    ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; CHECK: S_ENDPGM 0
+    %0:sgpr_64(p4) = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
+    undef %2.sub3:sgpr_128 = S_MOV_B32 61440
+    %2.sub2:sgpr_128 = S_MOV_B32 -1
+    %2.sub0:sgpr_128 = COPY %1.sub0
+    %2.sub1:sgpr_128 = COPY %1.sub1
+    undef %3.sub0:sgpr_128 = COPY %1.sub2
+    %3.sub1:sgpr_128 = COPY %1.sub3
+    %3.sub2:sgpr_128 = COPY %2.sub2
+    %3.sub3:sgpr_128 = COPY %2.sub3
+    early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec {
+      %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1)
+      %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+      %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+      %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    }
+    undef %8.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub1, implicit $exec
+    undef %9.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub0, implicit $exec
+    undef %10.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub3, implicit $exec
+    undef %11.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub2, implicit $exec
+    undef %12.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub1, implicit $exec
+    undef %13.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub0, implicit $exec
+    undef %14.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub3, implicit $exec
+    undef %15.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub2, implicit $exec
+    undef %16.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub1, implicit $exec
+    undef %17.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub0, implicit $exec
+    undef %18.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub3, implicit $exec
+    undef %19.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub2, implicit $exec
+    undef %20.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub1, implicit $exec
+    undef %21.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub0, implicit $exec
+    undef %22.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub3, implicit $exec
+    undef %23.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub2, implicit $exec
+    %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1)
+    undef %25.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub1, implicit $exec
+    undef %26.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub0, implicit $exec
+    undef %27.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub3, implicit $exec
+    undef %28.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub2, implicit $exec
+    %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    undef %30.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub1, implicit $exec
+    undef %31.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub0, implicit $exec
+    undef %32.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub3, implicit $exec
+    undef %33.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub2, implicit $exec
+    %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1)
+    undef %35.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub1, implicit $exec
+    undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub0, implicit $exec
+    undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub3, implicit $exec
+    undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub2, implicit $exec
+    %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
+    undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub1, implicit $exec
+    undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub0, implicit $exec
+    undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub3, implicit $exec
+    undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub2, implicit $exec
+    %44:sreg_32 = S_MOV_B32 65535
+    %8.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub1, implicit $exec
+    %9.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub0, implicit $exec
+    %10.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub3, implicit $exec
+    %11.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub2, implicit $exec
+    %12.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub1, implicit $exec
+    %13.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub0, implicit $exec
+    %14.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub3, implicit $exec
+    %15.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub2, implicit $exec
+    %16.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub1, implicit $exec
+    %17.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub0, implicit $exec
+    %18.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub3, implicit $exec
+    %19.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub2, implicit $exec
+    %20.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub1, implicit $exec
+    %21.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub0, implicit $exec
+    %22.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub3, implicit $exec
+    %23.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub2, implicit $exec
+    %25.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub1, implicit $exec
+    %26.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub0, implicit $exec
+    %27.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub3, implicit $exec
+    %28.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub2, implicit $exec
+    %30.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub1, implicit $exec
+    %31.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub0, implicit $exec
+    %32.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub3, implicit $exec
+    %33.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub2, implicit $exec
+    %35.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub1, implicit $exec
+    %36.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub0, implicit $exec
+    %37.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub3, implicit $exec
+    %38.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub2, implicit $exec
+    %40.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub1, implicit $exec
+    %41.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub0, implicit $exec
+    %42.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub3, implicit $exec
+    %43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec
+    %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+    %43.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %42.sub1:vreg_128 = COPY %43.sub1
+    %42.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %41.sub1:vreg_128 = COPY %43.sub1
+    %41.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    %40.sub1:vreg_128 = COPY %43.sub1
+    %40.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %38.sub1:vreg_128 = COPY %43.sub1
+    %38.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %37.sub1:vreg_128 = COPY %43.sub1
+    %37.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %36.sub1:vreg_128 = COPY %43.sub1
+    %36.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+    %35.sub1:vreg_128 = COPY %43.sub1
+    %35.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %33.sub1:vreg_128 = COPY %43.sub1
+    %33.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %32.sub1:vreg_128 = COPY %43.sub1
+    %32.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %31.sub1:vreg_128 = COPY %43.sub1
+    %31.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    %30.sub1:vreg_128 = COPY %43.sub1
+    %30.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %28.sub1:vreg_128 = COPY %43.sub1
+    %28.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %27.sub1:vreg_128 = COPY %43.sub1
+    %27.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %26.sub1:vreg_128 = COPY %43.sub1
+    %26.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1)
+    %25.sub1:vreg_128 = COPY %43.sub1
+    %25.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %23.sub1:vreg_128 = COPY %43.sub1
+    %23.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %22.sub1:vreg_128 = COPY %43.sub1
+    %22.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %21.sub1:vreg_128 = COPY %43.sub1
+    %21.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    %20.sub1:vreg_128 = COPY %43.sub1
+    %20.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %19.sub1:vreg_128 = COPY %43.sub1
+    %19.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %18.sub1:vreg_128 = COPY %43.sub1
+    %18.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %17.sub1:vreg_128 = COPY %43.sub1
+    %17.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1)
+    %16.sub1:vreg_128 = COPY %43.sub1
+    %16.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %15.sub1:vreg_128 = COPY %43.sub1
+    %15.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %14.sub1:vreg_128 = COPY %43.sub1
+    %14.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %13.sub1:vreg_128 = COPY %43.sub1
+    %13.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1)
+    %12.sub1:vreg_128 = COPY %43.sub1
+    %12.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %11.sub1:vreg_128 = COPY %43.sub1
+    %11.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1)
+    %10.sub1:vreg_128 = COPY %43.sub1
+    %10.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %9.sub1:vreg_128 = COPY %43.sub1
+    %9.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1)
+    %8.sub1:vreg_128 = COPY %43.sub1
+    %8.sub3:vreg_128 = COPY %43.sub1
+    BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
index 0fa0ddab4e11..6759cd1040f8 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
@@ -110,7 +110,7 @@ body: |
     ; and inserting a spill. Here we just check that the point where the error
     ; occurs we see a correctly generated spill.
     ; GCN-LABEL: bb.7:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0
@@ -126,7 +126,7 @@ body: |
     successors: %bb.12(0x80000000)
 
     ; GCN-LABEL: bb.9:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0
@@ -137,7 +137,7 @@ body: |
     successors: %bb.12(0x80000000)
 
     ; GCN-LABEL: bb.10:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0


        


More information about the llvm-commits mailing list