[llvm] 3279e44 - [AMDGPU] Extend SILoadStoreOptimizer to handle global stores

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 24 11:10:06 PST 2022


Author: Stanislav Mekhanoshin
Date: 2022-02-24T11:09:51-08:00
New Revision: 3279e44063b26e28cb08de07694f4bea4ada6b2c

URL: https://github.com/llvm/llvm-project/commit/3279e44063b26e28cb08de07694f4bea4ada6b2c
DIFF: https://github.com/llvm/llvm-project/commit/3279e44063b26e28cb08de07694f4bea4ada6b2c.diff

LOG: [AMDGPU] Extend SILoadStoreOptimizer to handle global stores

TODO: merge flat load/stores.
TODO: merge flat with global promoting to flat.

Differential Revision: https://reviews.llvm.org/D120346

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
    llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll
    llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
    llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index a38edaf5c4d92..588a55b70e978 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -80,7 +80,9 @@ enum InstClassEnum {
   TBUFFER_LOAD,
   TBUFFER_STORE,
   GLOBAL_LOAD,
-  GLOBAL_LOAD_SADDR
+  GLOBAL_LOAD_SADDR,
+  GLOBAL_STORE,
+  GLOBAL_STORE_SADDR
 };
 
 struct AddressRegs {
@@ -244,6 +246,9 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
   MachineBasicBlock::iterator
   mergeGlobalLoadPair(CombineInfo &CI, CombineInfo &Paired,
                       MachineBasicBlock::iterator InsertBefore);
+  MachineBasicBlock::iterator
+  mergeGlobalStorePair(CombineInfo &CI, CombineInfo &Paired,
+                       MachineBasicBlock::iterator InsertBefore);
 
   void updateBaseAndOffset(MachineInstr &I, Register NewBase,
                            int32_t NewOffset) const;
@@ -316,17 +321,25 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
   case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
   case AMDGPU::GLOBAL_LOAD_DWORD:
   case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORD:
+  case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
     return 1;
   case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
   case AMDGPU::GLOBAL_LOAD_DWORDX2:
   case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX2:
+  case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
     return 2;
   case AMDGPU::GLOBAL_LOAD_DWORDX3:
   case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX3:
+  case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
     return 3;
   case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
   case AMDGPU::GLOBAL_LOAD_DWORDX4:
   case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX4:
+  case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
     return 4;
   case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
     return 8;
@@ -421,6 +434,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
     return GLOBAL_LOAD_SADDR;
+  case AMDGPU::GLOBAL_STORE_DWORD:
+  case AMDGPU::GLOBAL_STORE_DWORDX2:
+  case AMDGPU::GLOBAL_STORE_DWORDX3:
+  case AMDGPU::GLOBAL_STORE_DWORDX4:
+    return GLOBAL_STORE;
+  case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+    return GLOBAL_STORE_SADDR;
   }
 }
 
@@ -464,6 +487,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
     return AMDGPU::GLOBAL_LOAD_DWORD_SADDR;
+  case AMDGPU::GLOBAL_STORE_DWORD:
+  case AMDGPU::GLOBAL_STORE_DWORDX2:
+  case AMDGPU::GLOBAL_STORE_DWORDX3:
+  case AMDGPU::GLOBAL_STORE_DWORDX4:
+    return AMDGPU::GLOBAL_STORE_DWORD;
+  case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+    return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
   }
 }
 
@@ -530,12 +563,20 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
   case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+  case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
     Result.SAddr = true;
     LLVM_FALLTHROUGH;
   case AMDGPU::GLOBAL_LOAD_DWORD:
   case AMDGPU::GLOBAL_LOAD_DWORDX2:
   case AMDGPU::GLOBAL_LOAD_DWORDX3:
   case AMDGPU::GLOBAL_LOAD_DWORDX4:
+  case AMDGPU::GLOBAL_STORE_DWORD:
+  case AMDGPU::GLOBAL_STORE_DWORDX2:
+  case AMDGPU::GLOBAL_STORE_DWORDX3:
+  case AMDGPU::GLOBAL_STORE_DWORDX4:
     Result.VAddr = true;
     return Result;
   }
@@ -1451,6 +1492,48 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair(
   return New;
 }
 
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalStorePair(
+    CombineInfo &CI, CombineInfo &Paired,
+    MachineBasicBlock::iterator InsertBefore) {
+  MachineBasicBlock *MBB = CI.I->getParent();
+  DebugLoc DL = CI.I->getDebugLoc();
+
+  const unsigned Opcode = getNewOpcode(CI, Paired);
+
+  std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
+  const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+  const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+  // Copy to the new source register.
+  const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+  Register SrcReg = MRI->createVirtualRegister(SuperRC);
+
+  const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+  const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
+
+  BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+      .add(*Src0)
+      .addImm(SubRegIdx0)
+      .add(*Src1)
+      .addImm(SubRegIdx1);
+
+  auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
+                 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
+                 .addReg(SrcReg, RegState::Kill);
+
+  if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
+    MIB.add(*SAddr);
+
+  MachineInstr *New =
+    MIB.addImm(std::min(CI.Offset, Paired.Offset))
+       .addImm(CI.CPol)
+       .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
+
+  CI.I->eraseFromParent();
+  Paired.I->eraseFromParent();
+  return New;
+}
+
 unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
                                             const CombineInfo &Paired) {
   const unsigned Width = CI.Width + Paired.Width;
@@ -1501,6 +1584,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
     case 4:
       return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR;
     }
+  case GLOBAL_STORE:
+    switch (Width) {
+    default:
+      return 0;
+    case 2:
+      return AMDGPU::GLOBAL_STORE_DWORDX2;
+    case 3:
+      return AMDGPU::GLOBAL_STORE_DWORDX3;
+    case 4:
+      return AMDGPU::GLOBAL_STORE_DWORDX4;
+    }
+  case GLOBAL_STORE_SADDR:
+    switch (Width) {
+    default:
+      return 0;
+    case 2:
+      return AMDGPU::GLOBAL_STORE_DWORDX2_SADDR;
+    case 3:
+      return AMDGPU::GLOBAL_STORE_DWORDX3_SADDR;
+    case 4:
+      return AMDGPU::GLOBAL_STORE_DWORDX4_SADDR;
+    }
   case MIMG:
     assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
            "No overlaps");
@@ -2140,6 +2245,11 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
       NewMI = mergeGlobalLoadPair(CI, Paired, Where->I);
       OptimizeListAgain |= CI.Width + Paired.Width < 4;
       break;
+    case GLOBAL_STORE:
+    case GLOBAL_STORE_SADDR:
+      NewMI = mergeGlobalStorePair(CI, Paired, Where->I);
+      OptimizeListAgain |= CI.Width + Paired.Width < 4;
+      break;
     }
     CI.setMI(NewMI, *this);
     CI.Order = Where->Order;

diff  --git a/llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll b/llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll
index 763829094c862..cbc5963464222 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll
@@ -2,8 +2,7 @@
 
 ; GCN-LABEL: {{^}}cast_constant_i64_to_build_vector_v4i16:
 ; GCN: global_store_short
-; GCN: global_store_dword v
-; GCN: global_store_dwordx2
+; GCN: global_store_dwordx3
 define amdgpu_kernel void @cast_constant_i64_to_build_vector_v4i16(i8 addrspace(1)* nocapture %data) {
 entry:
   store i8 72, i8 addrspace(1)* %data, align 1

diff  --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
index bb2d4e4a0ad83..1d64ae5d20cc0 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir
@@ -398,7 +398,6 @@ body:             |
     S_NOP 0, implicit %2, implicit %3
 ...
 
----
 name:            merge_global_load_dword_2_out_of_order
 body:             |
   bb.0.entry:
@@ -434,3 +433,422 @@ body:             |
     %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, align 8, addrspace 1)
     S_NOP 0, implicit %1, implicit %2
 ...
+
+---
+
+name:            merge_global_store_dword_2
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_2
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_3
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_3
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec :: (store (s96) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 4, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, killed %2, 8, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %3, 12, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_4
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_4
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_128 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, %1.sub1, 8, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %1.sub2, 12, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %1.sub3, 16, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, %1.sub0, 4, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_5
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_5
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:agpr_32 = IMPLICIT_DEF
+    %2:agpr_32 = IMPLICIT_DEF
+    %3:agpr_32 = IMPLICIT_DEF
+    %4:agpr_32 = IMPLICIT_DEF
+    %5:agpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, %1, 4, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %2, 8, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 8, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %3, 12, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %4, 16, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %5, 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_6
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_6
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 8, addrspace 1)
+    ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    %5:vgpr_32 = IMPLICIT_DEF
+    %6:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, %1, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 8, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %2, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %3, 12, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %4, 16, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %5, 20, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0, %6, 24, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dwordx2
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dwordx2
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `i64 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vreg_64_align2 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORDX2 %0, killed %1, 4, 0, implicit $exec :: (store (s64) into `i64 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORDX2 %0, killed %2, 12, 0, implicit $exec :: (store (s64) into `i64 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dwordx3_with_dwordx1
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dwordx3_with_dwordx1
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `i64 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_96_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORDX3 %0, killed %1, 4, 0, implicit $exec :: (store (s96) into `i64 addrspace(1)* undef`, align 16, addrspace 1)
+    GLOBAL_STORE_DWORD %0, killed %2, 16, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_agpr_with_vgpr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_agpr_with_vgpr
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:agpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_disjoint
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_disjoint
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 6, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %2, 6, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_overlap
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_overlap
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 2, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 2, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %2, 2, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 2, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_
diff erent_cpol
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_
diff erent_cpol
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0, killed %1, 0, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_
diff erent_vaddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_
diff erent_vaddr
+    ; GCN: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]].sub0_sub1, killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]].sub2_sub3, killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:vreg_128_align2 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD %0.sub0_sub1, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %0.sub2_sub3, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_saddr_2
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_saddr_2
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_saddr_3
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_saddr_3
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_saddr_4
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_saddr_4
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    %5:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %5, %0, 16, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_global_store_dword_saddr_6
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_global_store_dword_saddr_6
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1
+    ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    %5:vgpr_32 = IMPLICIT_DEF
+    %6:vgpr_32 = IMPLICIT_DEF
+    %7:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %5, %0, 16, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %7, %0, 24, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_saddr_with_global_store_dword
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_saddr_with_global_store_dword
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF1]], [[DEF3]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD %1, %3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_saddr_
diff erent_vaddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_saddr_
diff erent_vaddr
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub1, [[DEF3]], [[DEF]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1.sub1, %3, %0, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...
+
+---
+name:            no_merge_global_store_dword_saddr_
diff erent_saddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_global_store_dword_saddr_
diff erent_saddr
+    ; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[DEF2]], [[DEF]].sub0_sub1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[DEF3]], [[DEF]].sub2_sub3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+    %0:sgpr_128 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    GLOBAL_STORE_DWORD_SADDR %1, %2, %0.sub0_sub1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+    GLOBAL_STORE_DWORD_SADDR %1, %3, %0.sub2_sub3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
index 52666fd9e5f02..501da3ff09875 100644
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
@@ -3,6 +3,9 @@
 define protected amdgpu_kernel void @excess_soft_clause_reg_pressure(float addrspace(4)* %wei_ptr, float addrspace(1)* %out_ptr, float addrspace(1)* %in) {
 ; CHECK-LABEL: excess_soft_clause_reg_pressure:
 ; CHECK:  BB0_1: ; %for.cond28.preheader
+; CHECK:         s_load_dwordx16
+; CHECK-NEXT:    s_load_dwordx16
+
 ; CHECK:         global_load_dword
 ; CHECK-NEXT:    global_load_dword
 ; CHECK-NEXT:    global_load_dword
@@ -10,8 +13,6 @@ define protected amdgpu_kernel void @excess_soft_clause_reg_pressure(float addrs
 
 ; CHECK:         s_load_dwordx16
 ; CHECK-NEXT:    s_load_dwordx16
-; CHECK-NEXT:    s_load_dwordx16
-; CHECK-NEXT:    s_load_dwordx16
 
 ; CHECK-NOT: v_writelane_b32
 ; CHECK-NOT: v_readlane_b32


        


More information about the llvm-commits mailing list