[llvm] 6117794 - [AMDGPU] Use MUBUF instructions for global address space access
    Tony Tye via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu Dec 24 02:16:56 PST 2020
    
    
  
Author: Praveen Velliengiri
Date: 2020-12-24T10:13:04Z
New Revision: 61177943c9c762c9dd3b43909c5b0d83fdc6dc5f
URL: https://github.com/llvm/llvm-project/commit/61177943c9c762c9dd3b43909c5b0d83fdc6dc5f
DIFF: https://github.com/llvm/llvm-project/commit/61177943c9c762c9dd3b43909c5b0d83fdc6dc5f.diff
LOG: [AMDGPU] Use MUBUF instructions for global address space access
Currently, the compiler crashes in instruction selection of global
load/stores in gfx600 due to the lack of FLAT instructions. This patch
fix the crash by selecting MUBUF instructions for global load/stores
in gfx600.
Authored-by: Praveen Velliengiri <Praveen.Velliengiri at amd.com>
Reviewed by: t-tye
Differential revision: https://reviews.llvm.org/D92483
Added: 
    
Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
Removed: 
    
################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 3dbdfa7764dc..f9246000f3eb 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -104,6 +104,11 @@ specify the AMDGPU processor together with optional target features. See
 :ref:`amdgpu-target-id` and :ref:`amdgpu-target-features` for AMD GPU target
 specific information.
 
+Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following exceptions:
+
+* ``amdhsa`` is not supported in ``r600`` architecture (see :ref:`amdgpu-architecture-table`).
+
+
   .. table:: AMDGPU Processors
      :name: amdgpu-processor-table
 
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 8015eb5734e3..2b84107dcbea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -89,7 +89,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // Assuming ECC is enabled is the conservative default.
   SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
 
-  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
+  // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by default
+  if (isAmdHsaOS())
     FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
 
   FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
@@ -108,15 +109,36 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
   ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
 
+  // Implement the "generic" processors, which acts as the default when no
+  // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to
+  // the first amdgcn target that supports flat addressing. Other OSes defaults
+  // to the first amdgcn target.
+  if (Gen == AMDGPUSubtarget::INVALID) {
+     Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS
+                                        : AMDGPUSubtarget::SOUTHERN_ISLANDS;
+  }
+
   // We don't support FP64 for EG/NI atm.
   assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
 
-  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
-  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
-  // variants of MUBUF instructions.
-  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+  // Targets must either support 64-bit offsets for MUBUF instructions, and/or
+  // support flat operations, otherwise they cannot access a 64-bit global
+  // address space
+  assert(hasAddr64() || hasFlat());
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets
+  // that do not support ADDR64 variants of MUBUF instructions. Such targets
+  // cannot use a 64 bit offset with a MUBUF instruction to access the global
+  // address space
+  if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {
+    ToggleFeature(AMDGPU::FeatureFlatForGlobal);
     FlatForGlobal = true;
   }
+  // Unless +-flat-for-global is specified, use MUBUF instructions for global
+  // address space access if flat operations are not available.
+  if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {
+    ToggleFeature(AMDGPU::FeatureFlatForGlobal);
+    FlatForGlobal = false;
+  }
 
   // Set defaults if needed.
   if (MaxPrivateElementSize == 0)
@@ -182,7 +204,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
     AMDGPUSubtarget(TT),
     TargetTriple(TT),
-    Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
+    Gen(INVALID),
     InstrItins(getInstrItineraryForCPU(GPU)),
     LDSBankCount(0),
     MaxPrivateElementSize(0),
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0b0e4a5bea63..80d314ee3c52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -50,15 +50,16 @@ class StringRef;
 class AMDGPUSubtarget {
 public:
   enum Generation {
-    R600 = 0,
-    R700 = 1,
-    EVERGREEN = 2,
-    NORTHERN_ISLANDS = 3,
-    SOUTHERN_ISLANDS = 4,
-    SEA_ISLANDS = 5,
-    VOLCANIC_ISLANDS = 6,
-    GFX9 = 7,
-    GFX10 = 8
+    INVALID = 0,
+    R600 = 1,
+    R700 = 2,
+    EVERGREEN = 3,
+    NORTHERN_ISLANDS = 4,
+    SOUTHERN_ISLANDS = 5,
+    SEA_ISLANDS = 6,
+    VOLCANIC_ISLANDS = 7,
+    GFX9 = 8,
+    GFX10 = 9
   };
 
 private:
@@ -527,6 +528,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
   }
 
+  bool hasFlat() const {
+    return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
+  }
+
   // Return true if the target only has the reverse operand versions of VALU
   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
   bool hasOnlyRevVALUShifts() const {
diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 4d03f827ac2f..a9ebd798f33a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -533,10 +533,7 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
 define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
 ; HSA-LABEL: @kern_lds_ptr_si(
 ; HSA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:    [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT:    [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
-; HSA-NEXT:    [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
+; HSA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_lds_ptr_si(
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index e7ca50248d48..85dfa08bc461 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
index 679a56b3805f..9beeb850b8d2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_agent_unordered_load(
 ; GFX6-LABEL: global_agent_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -82,8 +82,8 @@ entry:
 define amdgpu_kernel void @global_agent_monotonic_load(
 ; GFX6-LABEL: global_agent_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -156,8 +156,8 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_load(
 ; GFX6-LABEL: global_agent_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -236,8 +236,8 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_load(
 ; GFX6-LABEL: global_agent_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -321,12 +321,12 @@ entry:
 define amdgpu_kernel void @global_agent_unordered_store(
 ; GFX6-LABEL: global_agent_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -382,12 +382,12 @@ entry:
 define amdgpu_kernel void @global_agent_monotonic_store(
 ; GFX6-LABEL: global_agent_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -443,12 +443,12 @@ entry:
 define amdgpu_kernel void @global_agent_release_store(
 ; GFX6-LABEL: global_agent_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -511,12 +511,12 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_store(
 ; GFX6-LABEL: global_agent_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -579,13 +579,13 @@ entry:
 define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
 ; GFX6-LABEL: global_agent_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_monotonic_atomicrmw:
@@ -640,13 +640,13 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_atomicrmw(
 ; GFX6-LABEL: global_agent_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -714,14 +714,14 @@ entry:
 define amdgpu_kernel void @global_agent_release_atomicrmw(
 ; GFX6-LABEL: global_agent_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_release_atomicrmw:
@@ -782,14 +782,14 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_agent_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -863,14 +863,14 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_agent_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -944,16 +944,16 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acquire_ret_atomicrmw:
@@ -1022,17 +1022,17 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acq_rel_ret_atomicrmw:
@@ -1107,17 +1107,17 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_seq_cst_ret_atomicrmw:
@@ -1192,14 +1192,14 @@ entry:
 define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_monotonic_monotonic_cmpxchg:
@@ -1261,14 +1261,14 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1343,15 +1343,15 @@ entry:
 define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_release_monotonic_cmpxchg:
@@ -1419,15 +1419,15 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1508,15 +1508,15 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1597,14 +1597,14 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1679,15 +1679,15 @@ entry:
 define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1768,15 +1768,15 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1857,15 +1857,15 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1946,15 +1946,15 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -2035,17 +2035,17 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
@@ -2124,18 +2124,18 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
@@ -2220,18 +2220,18 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
@@ -2316,17 +2316,17 @@ entry:
 define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
@@ -2405,18 +2405,18 @@ entry:
 define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_release_acquire_ret_cmpxchg:
@@ -2501,18 +2501,18 @@ entry:
 define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
@@ -2597,18 +2597,18 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
@@ -2693,18 +2693,18 @@ entry:
 define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
@@ -2789,8 +2789,8 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_unordered_load(
 ; GFX6-LABEL: global_agent_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2863,8 +2863,8 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_monotonic_load(
 ; GFX6-LABEL: global_agent_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2937,8 +2937,8 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_load(
 ; GFX6-LABEL: global_agent_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -3017,8 +3017,8 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -3102,12 +3102,12 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_unordered_store(
 ; GFX6-LABEL: global_agent_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3163,12 +3163,12 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_monotonic_store(
 ; GFX6-LABEL: global_agent_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3224,12 +3224,12 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_release_store(
 ; GFX6-LABEL: global_agent_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -3292,12 +3292,12 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -3360,13 +3360,13 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_monotonic_atomicrmw:
@@ -3421,13 +3421,13 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3493,14 +3493,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_release_atomicrmw:
@@ -3561,14 +3561,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3640,14 +3640,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3719,16 +3719,16 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
@@ -3797,17 +3797,17 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
@@ -3882,17 +3882,17 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
@@ -3967,14 +3967,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
@@ -4036,14 +4036,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4116,15 +4116,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
@@ -4192,15 +4192,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4279,15 +4279,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4366,14 +4366,14 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4446,15 +4446,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4533,15 +4533,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4620,15 +4620,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4707,15 +4707,15 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4794,17 +4794,17 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
@@ -4883,18 +4883,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
@@ -4979,18 +4979,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
@@ -5075,17 +5075,17 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
@@ -5164,18 +5164,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
@@ -5260,18 +5260,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
@@ -5356,18 +5356,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
@@ -5452,18 +5452,18 @@ entry:
 define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index 58d803761fa7..d88b87626f08 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_nontemporal_load_0(
 ; GFX6-LABEL: global_nontemporal_load_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -77,8 +77,8 @@ entry:
 define amdgpu_kernel void @global_nontemporal_load_1(
 ; GFX6-LABEL: global_nontemporal_load_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -157,8 +157,8 @@ entry:
 define amdgpu_kernel void @global_nontemporal_store_0(
 ; GFX6-LABEL: global_nontemporal_store_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -226,8 +226,8 @@ entry:
 define amdgpu_kernel void @global_nontemporal_store_1(
 ; GFX6-LABEL: global_nontemporal_store_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
index dd50b95e8691..5cb013b6c2bc 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_singlethread_unordered_load(
 ; GFX6-LABEL: global_singlethread_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -82,8 +82,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_monotonic_load(
 ; GFX6-LABEL: global_singlethread_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -156,8 +156,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_load(
 ; GFX6-LABEL: global_singlethread_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -230,8 +230,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_load(
 ; GFX6-LABEL: global_singlethread_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -304,12 +304,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_unordered_store(
 ; GFX6-LABEL: global_singlethread_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -365,12 +365,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_monotonic_store(
 ; GFX6-LABEL: global_singlethread_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -426,12 +426,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_release_store(
 ; GFX6-LABEL: global_singlethread_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -487,12 +487,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_store(
 ; GFX6-LABEL: global_singlethread_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -548,13 +548,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
 ; GFX6-LABEL: global_singlethread_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_monotonic_atomicrmw:
@@ -609,13 +609,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
 ; GFX6-LABEL: global_singlethread_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_atomicrmw:
@@ -670,13 +670,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_release_atomicrmw(
 ; GFX6-LABEL: global_singlethread_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_release_atomicrmw:
@@ -731,13 +731,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_singlethread_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_atomicrmw:
@@ -792,13 +792,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_singlethread_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_atomicrmw:
@@ -853,15 +853,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_ret_atomicrmw:
@@ -925,15 +925,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
@@ -997,15 +997,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
@@ -1069,14 +1069,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
@@ -1138,14 +1138,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
@@ -1207,14 +1207,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_release_monotonic_cmpxchg:
@@ -1276,14 +1276,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
@@ -1345,14 +1345,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
@@ -1414,14 +1414,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_acquire_cmpxchg:
@@ -1483,14 +1483,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_release_acquire_cmpxchg:
@@ -1552,14 +1552,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
@@ -1621,14 +1621,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
@@ -1690,14 +1690,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
@@ -1759,16 +1759,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
@@ -1842,16 +1842,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
@@ -1925,16 +1925,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
@@ -2008,16 +2008,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
@@ -2091,16 +2091,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
@@ -2174,16 +2174,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
@@ -2257,16 +2257,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
@@ -2340,16 +2340,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
@@ -2423,8 +2423,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_unordered_load(
 ; GFX6-LABEL: global_singlethread_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2497,8 +2497,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_monotonic_load(
 ; GFX6-LABEL: global_singlethread_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2571,8 +2571,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_load(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2645,8 +2645,8 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2719,12 +2719,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
 ; GFX6-LABEL: global_singlethread_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2780,12 +2780,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
 ; GFX6-LABEL: global_singlethread_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2841,12 +2841,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_release_store(
 ; GFX6-LABEL: global_singlethread_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2902,12 +2902,12 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2963,13 +2963,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
@@ -3024,13 +3024,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_atomicrmw:
@@ -3085,13 +3085,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_release_atomicrmw:
@@ -3146,13 +3146,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
@@ -3207,13 +3207,13 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
@@ -3268,15 +3268,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
@@ -3340,15 +3340,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
@@ -3412,15 +3412,15 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
@@ -3484,14 +3484,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
@@ -3553,14 +3553,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
@@ -3622,14 +3622,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
@@ -3691,14 +3691,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
@@ -3760,14 +3760,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
@@ -3829,14 +3829,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
@@ -3898,14 +3898,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
@@ -3967,14 +3967,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
@@ -4036,14 +4036,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
@@ -4105,14 +4105,14 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
@@ -4174,16 +4174,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
@@ -4257,16 +4257,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
@@ -4340,16 +4340,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
@@ -4423,16 +4423,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
@@ -4506,16 +4506,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
@@ -4589,16 +4589,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
@@ -4672,16 +4672,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
@@ -4755,16 +4755,16 @@ entry:
 define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
index e4b0b29e7437..087445257795 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_system_unordered_load(
 ; GFX6-LABEL: global_system_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -82,8 +82,8 @@ entry:
 define amdgpu_kernel void @global_system_monotonic_load(
 ; GFX6-LABEL: global_system_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -156,8 +156,8 @@ entry:
 define amdgpu_kernel void @global_system_acquire_load(
 ; GFX6-LABEL: global_system_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -236,8 +236,8 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_load(
 ; GFX6-LABEL: global_system_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -321,12 +321,12 @@ entry:
 define amdgpu_kernel void @global_system_unordered_store(
 ; GFX6-LABEL: global_system_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -382,12 +382,12 @@ entry:
 define amdgpu_kernel void @global_system_monotonic_store(
 ; GFX6-LABEL: global_system_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -443,12 +443,12 @@ entry:
 define amdgpu_kernel void @global_system_release_store(
 ; GFX6-LABEL: global_system_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -511,12 +511,12 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_store(
 ; GFX6-LABEL: global_system_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -579,13 +579,13 @@ entry:
 define amdgpu_kernel void @global_system_monotonic_atomicrmw(
 ; GFX6-LABEL: global_system_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_monotonic_atomicrmw:
@@ -640,13 +640,13 @@ entry:
 define amdgpu_kernel void @global_system_acquire_atomicrmw(
 ; GFX6-LABEL: global_system_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -714,14 +714,14 @@ entry:
 define amdgpu_kernel void @global_system_release_atomicrmw(
 ; GFX6-LABEL: global_system_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_release_atomicrmw:
@@ -782,14 +782,14 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_system_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -863,14 +863,14 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_system_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -944,16 +944,16 @@ entry:
 define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_system_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acquire_ret_atomicrmw:
@@ -1022,17 +1022,17 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw:
@@ -1107,17 +1107,17 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw:
@@ -1192,14 +1192,14 @@ entry:
 define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg:
@@ -1261,14 +1261,14 @@ entry:
 define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1343,15 +1343,15 @@ entry:
 define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_release_monotonic_cmpxchg:
@@ -1419,15 +1419,15 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1508,15 +1508,15 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1597,14 +1597,14 @@ entry:
 define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1679,15 +1679,15 @@ entry:
 define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1768,15 +1768,15 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1857,15 +1857,15 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -1946,15 +1946,15 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -2035,17 +2035,17 @@ entry:
 define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
@@ -2124,18 +2124,18 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
@@ -2220,18 +2220,18 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
@@ -2316,17 +2316,17 @@ entry:
 define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg:
@@ -2405,18 +2405,18 @@ entry:
 define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg:
@@ -2501,18 +2501,18 @@ entry:
 define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
@@ -2597,18 +2597,18 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
@@ -2693,18 +2693,18 @@ entry:
 define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
@@ -2789,8 +2789,8 @@ entry:
 define amdgpu_kernel void @global_system_one_as_unordered_load(
 ; GFX6-LABEL: global_system_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2863,8 +2863,8 @@ entry:
 define amdgpu_kernel void @global_system_one_as_monotonic_load(
 ; GFX6-LABEL: global_system_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2937,8 +2937,8 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_load(
 ; GFX6-LABEL: global_system_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -3017,8 +3017,8 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_load(
 ; GFX6-LABEL: global_system_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -3102,12 +3102,12 @@ entry:
 define amdgpu_kernel void @global_system_one_as_unordered_store(
 ; GFX6-LABEL: global_system_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3163,12 +3163,12 @@ entry:
 define amdgpu_kernel void @global_system_one_as_monotonic_store(
 ; GFX6-LABEL: global_system_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3224,12 +3224,12 @@ entry:
 define amdgpu_kernel void @global_system_one_as_release_store(
 ; GFX6-LABEL: global_system_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -3292,12 +3292,12 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_store(
 ; GFX6-LABEL: global_system_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -3360,13 +3360,13 @@ entry:
 define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw:
@@ -3421,13 +3421,13 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3493,14 +3493,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_release_atomicrmw:
@@ -3561,14 +3561,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3640,14 +3640,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -3719,16 +3719,16 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw:
@@ -3797,17 +3797,17 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
@@ -3882,17 +3882,17 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
@@ -3967,14 +3967,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
@@ -4036,14 +4036,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4116,15 +4116,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg:
@@ -4192,15 +4192,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4279,15 +4279,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4366,14 +4366,14 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4446,15 +4446,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4533,15 +4533,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4620,15 +4620,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4707,15 +4707,15 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
 ; GFX6-NEXT:    s_endpgm
@@ -4794,17 +4794,17 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
@@ -4883,18 +4883,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
@@ -4979,18 +4979,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
@@ -5075,17 +5075,17 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
@@ -5164,18 +5164,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
@@ -5260,18 +5260,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
@@ -5356,18 +5356,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
@@ -5452,18 +5452,18 @@ entry:
 define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
index 9aab2f3b6123..cb32f3a3e310 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_wavefront_unordered_load(
 ; GFX6-LABEL: global_wavefront_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -82,8 +82,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_monotonic_load(
 ; GFX6-LABEL: global_wavefront_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -156,8 +156,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_load(
 ; GFX6-LABEL: global_wavefront_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -230,8 +230,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_load(
 ; GFX6-LABEL: global_wavefront_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -304,12 +304,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_unordered_store(
 ; GFX6-LABEL: global_wavefront_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -365,12 +365,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_monotonic_store(
 ; GFX6-LABEL: global_wavefront_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -426,12 +426,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_release_store(
 ; GFX6-LABEL: global_wavefront_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -487,12 +487,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_store(
 ; GFX6-LABEL: global_wavefront_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -548,13 +548,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
 ; GFX6-LABEL: global_wavefront_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_monotonic_atomicrmw:
@@ -609,13 +609,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
 ; GFX6-LABEL: global_wavefront_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_atomicrmw:
@@ -670,13 +670,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_release_atomicrmw(
 ; GFX6-LABEL: global_wavefront_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_release_atomicrmw:
@@ -731,13 +731,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_wavefront_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_atomicrmw:
@@ -792,13 +792,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_wavefront_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_atomicrmw:
@@ -853,15 +853,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_ret_atomicrmw:
@@ -925,15 +925,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
@@ -997,15 +997,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
@@ -1069,14 +1069,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
@@ -1138,14 +1138,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
@@ -1207,14 +1207,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_release_monotonic_cmpxchg:
@@ -1276,14 +1276,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
@@ -1345,14 +1345,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
@@ -1414,14 +1414,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_acquire_cmpxchg:
@@ -1483,14 +1483,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_release_acquire_cmpxchg:
@@ -1552,14 +1552,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
@@ -1621,14 +1621,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
@@ -1690,14 +1690,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
@@ -1759,16 +1759,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
@@ -1842,16 +1842,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
@@ -1925,16 +1925,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
@@ -2008,16 +2008,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
@@ -2091,16 +2091,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
@@ -2174,16 +2174,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
@@ -2257,16 +2257,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
@@ -2340,16 +2340,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
@@ -2423,8 +2423,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_unordered_load(
 ; GFX6-LABEL: global_wavefront_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2497,8 +2497,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_monotonic_load(
 ; GFX6-LABEL: global_wavefront_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2571,8 +2571,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_load(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2645,8 +2645,8 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2719,12 +2719,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
 ; GFX6-LABEL: global_wavefront_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2780,12 +2780,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
 ; GFX6-LABEL: global_wavefront_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2841,12 +2841,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_release_store(
 ; GFX6-LABEL: global_wavefront_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2902,12 +2902,12 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2963,13 +2963,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
@@ -3024,13 +3024,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_atomicrmw:
@@ -3085,13 +3085,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_release_atomicrmw:
@@ -3146,13 +3146,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
@@ -3207,13 +3207,13 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
@@ -3268,15 +3268,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
@@ -3340,15 +3340,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
@@ -3412,15 +3412,15 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
@@ -3484,14 +3484,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
@@ -3553,14 +3553,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
@@ -3622,14 +3622,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
@@ -3691,14 +3691,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
@@ -3760,14 +3760,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
@@ -3829,14 +3829,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
@@ -3898,14 +3898,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
@@ -3967,14 +3967,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
@@ -4036,14 +4036,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
@@ -4105,14 +4105,14 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
@@ -4174,16 +4174,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
@@ -4257,16 +4257,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
@@ -4340,16 +4340,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
@@ -4423,16 +4423,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
@@ -4506,16 +4506,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
@@ -4589,16 +4589,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
@@ -4672,16 +4672,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
@@ -4755,16 +4755,16 @@ entry:
 define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
index 1fd85e6acb9f..a1ec279d1c24 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,8 +8,8 @@
 define amdgpu_kernel void @global_workgroup_unordered_load(
 ; GFX6-LABEL: global_workgroup_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -82,8 +82,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_monotonic_load(
 ; GFX6-LABEL: global_workgroup_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -156,8 +156,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_load(
 ; GFX6-LABEL: global_workgroup_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -231,8 +231,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_load(
 ; GFX6-LABEL: global_workgroup_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -310,12 +310,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_unordered_store(
 ; GFX6-LABEL: global_workgroup_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -371,12 +371,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_monotonic_store(
 ; GFX6-LABEL: global_workgroup_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -432,12 +432,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_release_store(
 ; GFX6-LABEL: global_workgroup_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -499,12 +499,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_store(
 ; GFX6-LABEL: global_workgroup_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -566,13 +566,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
 ; GFX6-LABEL: global_workgroup_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_monotonic_atomicrmw:
@@ -627,13 +627,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
 ; GFX6-LABEL: global_workgroup_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -695,14 +695,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_release_atomicrmw(
 ; GFX6-LABEL: global_workgroup_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_release_atomicrmw:
@@ -762,14 +762,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_workgroup_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -836,14 +836,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_workgroup_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -910,15 +910,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acquire_ret_atomicrmw:
@@ -983,16 +983,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
@@ -1062,16 +1062,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
@@ -1141,14 +1141,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
@@ -1210,14 +1210,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1286,15 +1286,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_release_monotonic_cmpxchg:
@@ -1361,15 +1361,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1443,15 +1443,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1525,14 +1525,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1601,15 +1601,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1683,15 +1683,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1765,15 +1765,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1847,15 +1847,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -1929,16 +1929,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
@@ -2014,17 +2014,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
@@ -2105,17 +2105,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
@@ -2196,16 +2196,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
@@ -2281,17 +2281,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
@@ -2372,17 +2372,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
@@ -2463,17 +2463,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
@@ -2554,17 +2554,17 @@ entry:
 define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
@@ -2645,8 +2645,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_unordered_load(
 ; GFX6-LABEL: global_workgroup_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2719,8 +2719,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_monotonic_load(
 ; GFX6-LABEL: global_workgroup_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2793,8 +2793,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_load(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2868,8 +2868,8 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
@@ -2944,12 +2944,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
 ; GFX6-LABEL: global_workgroup_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3005,12 +3005,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
 ; GFX6-LABEL: global_workgroup_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3066,12 +3066,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_release_store(
 ; GFX6-LABEL: global_workgroup_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3129,12 +3129,12 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3192,13 +3192,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
@@ -3253,13 +3253,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_atomicrmw:
@@ -3316,13 +3316,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_release_atomicrmw:
@@ -3379,13 +3379,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
@@ -3444,13 +3444,13 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
@@ -3509,15 +3509,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
@@ -3582,15 +3582,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
@@ -3657,15 +3657,15 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s4, s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
@@ -3732,14 +3732,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
@@ -3801,14 +3801,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
@@ -3872,14 +3872,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
@@ -3943,14 +3943,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
@@ -4016,14 +4016,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
@@ -4089,14 +4089,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
@@ -4160,14 +4160,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
@@ -4233,14 +4233,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
@@ -4306,14 +4306,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
@@ -4379,14 +4379,14 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
@@ -4452,16 +4452,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
@@ -4536,16 +4536,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
@@ -4622,16 +4622,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
@@ -4708,16 +4708,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
@@ -4792,16 +4792,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
@@ -4878,16 +4878,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
@@ -4964,16 +4964,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
@@ -5050,16 +5050,16 @@ entry:
 define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
index 49499b7570e9..afb020f5314c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_agent_unordered_load(
 ; GFX6-LABEL: local_agent_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -74,13 +74,13 @@ entry:
 define amdgpu_kernel void @local_agent_monotonic_load(
 ; GFX6-LABEL: local_agent_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -140,15 +140,15 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_load(
 ; GFX6-LABEL: local_agent_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -212,16 +212,16 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_load(
 ; GFX6-LABEL: local_agent_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -291,12 +291,12 @@ entry:
 define amdgpu_kernel void @local_agent_unordered_store(
 ; GFX6-LABEL: local_agent_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -346,12 +346,12 @@ entry:
 define amdgpu_kernel void @local_agent_monotonic_store(
 ; GFX6-LABEL: local_agent_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -401,12 +401,12 @@ entry:
 define amdgpu_kernel void @local_agent_release_store(
 ; GFX6-LABEL: local_agent_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -463,12 +463,12 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_store(
 ; GFX6-LABEL: local_agent_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -525,12 +525,12 @@ entry:
 define amdgpu_kernel void @local_agent_monotonic_atomicrmw(
 ; GFX6-LABEL: local_agent_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -580,12 +580,12 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_atomicrmw(
 ; GFX6-LABEL: local_agent_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
@@ -648,12 +648,12 @@ entry:
 define amdgpu_kernel void @local_agent_release_atomicrmw(
 ; GFX6-LABEL: local_agent_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -710,12 +710,12 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_agent_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -785,12 +785,12 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_agent_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -860,12 +860,12 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
@@ -934,12 +934,12 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1015,12 +1015,12 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1096,8 +1096,8 @@ entry:
 define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1158,8 +1158,8 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1233,8 +1233,8 @@ entry:
 define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1302,8 +1302,8 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1384,8 +1384,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1466,8 +1466,8 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1541,8 +1541,8 @@ entry:
 define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1623,8 +1623,8 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1705,8 +1705,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1787,8 +1787,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1869,8 +1869,8 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1949,8 +1949,8 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2036,8 +2036,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2123,8 +2123,8 @@ entry:
 define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2203,8 +2203,8 @@ entry:
 define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2290,8 +2290,8 @@ entry:
 define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2377,8 +2377,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2464,8 +2464,8 @@ entry:
 define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2551,13 +2551,13 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_unordered_load(
 ; GFX6-LABEL: local_agent_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2617,13 +2617,13 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_monotonic_load(
 ; GFX6-LABEL: local_agent_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2683,13 +2683,13 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_load(
 ; GFX6-LABEL: local_agent_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2749,13 +2749,13 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_load(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2815,12 +2815,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_unordered_store(
 ; GFX6-LABEL: local_agent_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2870,12 +2870,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_monotonic_store(
 ; GFX6-LABEL: local_agent_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2925,12 +2925,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_release_store(
 ; GFX6-LABEL: local_agent_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2980,12 +2980,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_store(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3035,12 +3035,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3090,12 +3090,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3145,12 +3145,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_release_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3200,12 +3200,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3255,12 +3255,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3310,12 +3310,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3376,12 +3376,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3442,12 +3442,12 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3508,8 +3508,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3570,8 +3570,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3632,8 +3632,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3694,8 +3694,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3756,8 +3756,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3818,8 +3818,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3880,8 +3880,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3942,8 +3942,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4004,8 +4004,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4066,8 +4066,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4128,8 +4128,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4202,8 +4202,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4276,8 +4276,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4350,8 +4350,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4424,8 +4424,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4498,8 +4498,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4572,8 +4572,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4646,8 +4646,8 @@ entry:
 define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
index db3b8b2a5f22..9885036a78c8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_nontemporal_load_0(
 ; GFX6-LABEL: local_nontemporal_load_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
 ; GFX6-NEXT:    s_mov_b32 m0, -1
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -83,13 +83,13 @@ entry:
 define amdgpu_kernel void @local_nontemporal_load_1(
 ; GFX6-LABEL: local_nontemporal_load_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    s_mov_b32 m0, -1
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s6, v0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -163,14 +163,14 @@ entry:
 define amdgpu_kernel void @local_nontemporal_store_0(
 ; GFX6-LABEL: local_nontemporal_store_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x2
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s1, s[2:3], 0x0
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -235,15 +235,15 @@ entry:
 define amdgpu_kernel void @local_nontemporal_store_1(
 ; GFX6-LABEL: local_nontemporal_store_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s1, s[2:3], 0x0
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
index cb45b58da5ac..4e521c3dce8b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_singlethread_unordered_load(
 ; GFX6-LABEL: local_singlethread_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -74,13 +74,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_monotonic_load(
 ; GFX6-LABEL: local_singlethread_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -140,13 +140,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_load(
 ; GFX6-LABEL: local_singlethread_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -206,13 +206,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_load(
 ; GFX6-LABEL: local_singlethread_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -272,12 +272,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_unordered_store(
 ; GFX6-LABEL: local_singlethread_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -327,12 +327,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_monotonic_store(
 ; GFX6-LABEL: local_singlethread_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -382,12 +382,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_release_store(
 ; GFX6-LABEL: local_singlethread_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -437,12 +437,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_store(
 ; GFX6-LABEL: local_singlethread_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -492,12 +492,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw(
 ; GFX6-LABEL: local_singlethread_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -547,12 +547,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_atomicrmw(
 ; GFX6-LABEL: local_singlethread_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -602,12 +602,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_release_atomicrmw(
 ; GFX6-LABEL: local_singlethread_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -657,12 +657,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_singlethread_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -712,12 +712,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_singlethread_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -767,12 +767,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -833,12 +833,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -899,12 +899,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -965,8 +965,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1027,8 +1027,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1089,8 +1089,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1151,8 +1151,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1213,8 +1213,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1275,8 +1275,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1337,8 +1337,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1399,8 +1399,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1461,8 +1461,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1523,8 +1523,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1585,8 +1585,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1659,8 +1659,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1733,8 +1733,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1807,8 +1807,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1881,8 +1881,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1955,8 +1955,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2029,8 +2029,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2103,8 +2103,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2177,13 +2177,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_unordered_load(
 ; GFX6-LABEL: local_singlethread_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2243,13 +2243,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_monotonic_load(
 ; GFX6-LABEL: local_singlethread_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2309,13 +2309,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_load(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2375,13 +2375,13 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2441,12 +2441,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_unordered_store(
 ; GFX6-LABEL: local_singlethread_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2496,12 +2496,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_monotonic_store(
 ; GFX6-LABEL: local_singlethread_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2551,12 +2551,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_release_store(
 ; GFX6-LABEL: local_singlethread_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2606,12 +2606,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2661,12 +2661,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2716,12 +2716,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2771,12 +2771,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2826,12 +2826,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2881,12 +2881,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2936,12 +2936,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3002,12 +3002,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3068,12 +3068,12 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3134,8 +3134,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3196,8 +3196,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3258,8 +3258,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3320,8 +3320,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3382,8 +3382,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3444,8 +3444,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3506,8 +3506,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3568,8 +3568,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3630,8 +3630,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3692,8 +3692,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3754,8 +3754,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3828,8 +3828,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3902,8 +3902,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3976,8 +3976,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4050,8 +4050,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4124,8 +4124,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4198,8 +4198,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4272,8 +4272,8 @@ entry:
 define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
index 7e500d4415e6..4ef62030ef5c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_system_unordered_load(
 ; GFX6-LABEL: local_system_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -74,13 +74,13 @@ entry:
 define amdgpu_kernel void @local_system_monotonic_load(
 ; GFX6-LABEL: local_system_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -140,15 +140,15 @@ entry:
 define amdgpu_kernel void @local_system_acquire_load(
 ; GFX6-LABEL: local_system_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -212,16 +212,16 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_load(
 ; GFX6-LABEL: local_system_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -291,12 +291,12 @@ entry:
 define amdgpu_kernel void @local_system_unordered_store(
 ; GFX6-LABEL: local_system_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -346,12 +346,12 @@ entry:
 define amdgpu_kernel void @local_system_monotonic_store(
 ; GFX6-LABEL: local_system_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -401,12 +401,12 @@ entry:
 define amdgpu_kernel void @local_system_release_store(
 ; GFX6-LABEL: local_system_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -463,12 +463,12 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_store(
 ; GFX6-LABEL: local_system_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -525,12 +525,12 @@ entry:
 define amdgpu_kernel void @local_system_monotonic_atomicrmw(
 ; GFX6-LABEL: local_system_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -580,12 +580,12 @@ entry:
 define amdgpu_kernel void @local_system_acquire_atomicrmw(
 ; GFX6-LABEL: local_system_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
@@ -648,12 +648,12 @@ entry:
 define amdgpu_kernel void @local_system_release_atomicrmw(
 ; GFX6-LABEL: local_system_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -710,12 +710,12 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_system_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -785,12 +785,12 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_system_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -860,12 +860,12 @@ entry:
 define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_system_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    buffer_wbinvl1
@@ -934,12 +934,12 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_system_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1015,12 +1015,12 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_system_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1096,8 +1096,8 @@ entry:
 define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1158,8 +1158,8 @@ entry:
 define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1233,8 +1233,8 @@ entry:
 define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1302,8 +1302,8 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1384,8 +1384,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1466,8 +1466,8 @@ entry:
 define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1541,8 +1541,8 @@ entry:
 define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1623,8 +1623,8 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1705,8 +1705,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1787,8 +1787,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1869,8 +1869,8 @@ entry:
 define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1949,8 +1949,8 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2036,8 +2036,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2123,8 +2123,8 @@ entry:
 define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2203,8 +2203,8 @@ entry:
 define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2290,8 +2290,8 @@ entry:
 define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2377,8 +2377,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2464,8 +2464,8 @@ entry:
 define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2551,13 +2551,13 @@ entry:
 define amdgpu_kernel void @local_system_one_as_unordered_load(
 ; GFX6-LABEL: local_system_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2617,13 +2617,13 @@ entry:
 define amdgpu_kernel void @local_system_one_as_monotonic_load(
 ; GFX6-LABEL: local_system_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2683,13 +2683,13 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_load(
 ; GFX6-LABEL: local_system_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2749,13 +2749,13 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_load(
 ; GFX6-LABEL: local_system_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2815,12 +2815,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_unordered_store(
 ; GFX6-LABEL: local_system_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2870,12 +2870,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_monotonic_store(
 ; GFX6-LABEL: local_system_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2925,12 +2925,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_release_store(
 ; GFX6-LABEL: local_system_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2980,12 +2980,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_store(
 ; GFX6-LABEL: local_system_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3035,12 +3035,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3090,12 +3090,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3145,12 +3145,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_release_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3200,12 +3200,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3255,12 +3255,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3310,12 +3310,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3376,12 +3376,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3442,12 +3442,12 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_system_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3508,8 +3508,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3570,8 +3570,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3632,8 +3632,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3694,8 +3694,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3756,8 +3756,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3818,8 +3818,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3880,8 +3880,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3942,8 +3942,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4004,8 +4004,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4066,8 +4066,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4128,8 +4128,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4202,8 +4202,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4276,8 +4276,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4350,8 +4350,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4424,8 +4424,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4498,8 +4498,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4572,8 +4572,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4646,8 +4646,8 @@ entry:
 define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
index 1bc5c14992e4..cb5265b8f085 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_wavefront_unordered_load(
 ; GFX6-LABEL: local_wavefront_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -74,13 +74,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_monotonic_load(
 ; GFX6-LABEL: local_wavefront_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -140,13 +140,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_load(
 ; GFX6-LABEL: local_wavefront_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -206,13 +206,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_load(
 ; GFX6-LABEL: local_wavefront_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -272,12 +272,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_unordered_store(
 ; GFX6-LABEL: local_wavefront_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -327,12 +327,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_monotonic_store(
 ; GFX6-LABEL: local_wavefront_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -382,12 +382,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_release_store(
 ; GFX6-LABEL: local_wavefront_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -437,12 +437,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_store(
 ; GFX6-LABEL: local_wavefront_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -492,12 +492,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw(
 ; GFX6-LABEL: local_wavefront_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -547,12 +547,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_atomicrmw(
 ; GFX6-LABEL: local_wavefront_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -602,12 +602,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_release_atomicrmw(
 ; GFX6-LABEL: local_wavefront_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -657,12 +657,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_wavefront_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -712,12 +712,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_wavefront_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -767,12 +767,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -833,12 +833,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -899,12 +899,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -965,8 +965,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1027,8 +1027,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1089,8 +1089,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1151,8 +1151,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1213,8 +1213,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1275,8 +1275,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1337,8 +1337,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1399,8 +1399,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1461,8 +1461,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1523,8 +1523,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1585,8 +1585,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1659,8 +1659,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1733,8 +1733,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1807,8 +1807,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1881,8 +1881,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1955,8 +1955,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2029,8 +2029,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2103,8 +2103,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2177,13 +2177,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_unordered_load(
 ; GFX6-LABEL: local_wavefront_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2243,13 +2243,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_monotonic_load(
 ; GFX6-LABEL: local_wavefront_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2309,13 +2309,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_load(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2375,13 +2375,13 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2441,12 +2441,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_unordered_store(
 ; GFX6-LABEL: local_wavefront_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2496,12 +2496,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_monotonic_store(
 ; GFX6-LABEL: local_wavefront_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2551,12 +2551,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_release_store(
 ; GFX6-LABEL: local_wavefront_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2606,12 +2606,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2661,12 +2661,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2716,12 +2716,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2771,12 +2771,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2826,12 +2826,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2881,12 +2881,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2936,12 +2936,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3002,12 +3002,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3068,12 +3068,12 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3134,8 +3134,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3196,8 +3196,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3258,8 +3258,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3320,8 +3320,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3382,8 +3382,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3444,8 +3444,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3506,8 +3506,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3568,8 +3568,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3630,8 +3630,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3692,8 +3692,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3754,8 +3754,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3828,8 +3828,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3902,8 +3902,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3976,8 +3976,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4050,8 +4050,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4124,8 +4124,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4198,8 +4198,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4272,8 +4272,8 @@ entry:
 define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
index 1823f3ad8882..deca37d76f7d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,13 +8,13 @@
 define amdgpu_kernel void @local_workgroup_unordered_load(
 ; GFX6-LABEL: local_workgroup_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -74,13 +74,13 @@ entry:
 define amdgpu_kernel void @local_workgroup_monotonic_load(
 ; GFX6-LABEL: local_workgroup_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -140,14 +140,14 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_load(
 ; GFX6-LABEL: local_workgroup_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -207,15 +207,15 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_load(
 ; GFX6-LABEL: local_workgroup_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_read_b32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -280,12 +280,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_unordered_store(
 ; GFX6-LABEL: local_workgroup_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -335,12 +335,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_monotonic_store(
 ; GFX6-LABEL: local_workgroup_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -390,12 +390,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_release_store(
 ; GFX6-LABEL: local_workgroup_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -451,12 +451,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_store(
 ; GFX6-LABEL: local_workgroup_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -512,12 +512,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw(
 ; GFX6-LABEL: local_workgroup_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -567,12 +567,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
 ; GFX6-LABEL: local_workgroup_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_endpgm
@@ -629,12 +629,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_release_atomicrmw(
 ; GFX6-LABEL: local_workgroup_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
@@ -690,12 +690,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_workgroup_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -758,12 +758,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_workgroup_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -826,12 +826,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -894,12 +894,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -968,12 +968,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1042,8 +1042,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1104,8 +1104,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1173,8 +1173,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1241,8 +1241,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1316,8 +1316,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1391,8 +1391,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1460,8 +1460,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1535,8 +1535,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1610,8 +1610,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1685,8 +1685,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1760,8 +1760,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1835,8 +1835,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1916,8 +1916,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -1997,8 +1997,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2072,8 +2072,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2153,8 +2153,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2234,8 +2234,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2315,8 +2315,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -2396,13 +2396,13 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_unordered_load(
 ; GFX6-LABEL: local_workgroup_one_as_unordered_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2462,13 +2462,13 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_monotonic_load(
 ; GFX6-LABEL: local_workgroup_one_as_monotonic_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2528,13 +2528,13 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_load(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2594,13 +2594,13 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_load:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    ds_read_b32 v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v1, v0
 ; GFX6-NEXT:    s_endpgm
@@ -2660,12 +2660,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_unordered_store(
 ; GFX6-LABEL: local_workgroup_one_as_unordered_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2715,12 +2715,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_monotonic_store(
 ; GFX6-LABEL: local_workgroup_one_as_monotonic_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2770,12 +2770,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_release_store(
 ; GFX6-LABEL: local_workgroup_one_as_release_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2825,12 +2825,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_store:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s2
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NEXT:    ds_write_b32 v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2880,12 +2880,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_monotonic_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2935,12 +2935,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -2990,12 +2990,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_release_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3045,12 +3045,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3100,12 +3100,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v0, v0, v1
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3155,12 +3155,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3221,12 +3221,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3287,12 +3287,12 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s1, s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    ds_wrxchg_rtn_b32 v1, v0, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ds_write_b32 v0, v1
@@ -3353,8 +3353,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3415,8 +3415,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3477,8 +3477,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3539,8 +3539,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3601,8 +3601,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3663,8 +3663,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3725,8 +3725,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_release_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3787,8 +3787,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3849,8 +3849,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3911,8 +3911,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -3973,8 +3973,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4047,8 +4047,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4121,8 +4121,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4195,8 +4195,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4269,8 +4269,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4343,8 +4343,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4417,8 +4417,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -4491,8 +4491,8 @@ entry:
 define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ; GFX6-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xa
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x1
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index 03977d855005..03fb7d3eba4b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
@@ -8,18 +8,16 @@
 define amdgpu_kernel void @private_nontemporal_load_0(
 ; GFX6-LABEL: private_nontemporal_load_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GFX6-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s10, -1
-; GFX6-NEXT:    s_mov_b32 s11, 0xe8f000
-; GFX6-NEXT:    s_add_u32 s8, s8, s3
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GFX6-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_add_u32 s8, s8, s7
 ; GFX6-NEXT:    s_addc_u32 s9, s9, 0
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen glc slc
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -103,19 +101,17 @@ entry:
 define amdgpu_kernel void @private_nontemporal_load_1(
 ; GFX6-LABEL: private_nontemporal_load_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GFX6-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s10, -1
-; GFX6-NEXT:    s_mov_b32 s11, 0xe8f000
-; GFX6-NEXT:    s_add_u32 s8, s8, s3
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GFX6-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GFX6-NEXT:    s_load_dword s6, s[4:5], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; GFX6-NEXT:    s_add_u32 s8, s8, s7
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX6-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s6, v0
 ; GFX6-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen glc slc
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s3, 0x100f000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -203,20 +199,18 @@ entry:
 define amdgpu_kernel void @private_nontemporal_store_0(
 ; GFX6-LABEL: private_nontemporal_store_0:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_mov_b32 s4, SCRATCH_RSRC_DWORD0
-; GFX6-NEXT:    s_mov_b32 s5, SCRATCH_RSRC_DWORD1
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s7, 0xe8f000
-; GFX6-NEXT:    s_add_u32 s4, s4, s3
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
-; GFX6-NEXT:    s_addc_u32 s5, s5, 0
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GFX6-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x2
+; GFX6-NEXT:    s_add_u32 s8, s8, s7
+; GFX6-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s1, s[2:3], 0x0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s1
-; GFX6-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen glc slc
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen glc slc
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: private_nontemporal_store_0:
@@ -296,21 +290,19 @@ entry:
 define amdgpu_kernel void @private_nontemporal_store_1(
 ; GFX6-LABEL: private_nontemporal_store_1:
 ; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_mov_b32 s4, SCRATCH_RSRC_DWORD0
-; GFX6-NEXT:    s_mov_b32 s5, SCRATCH_RSRC_DWORD1
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s7, 0xe8f000
-; GFX6-NEXT:    s_add_u32 s4, s4, s3
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GFX6-NEXT:    s_mov_b64 s[10:11], s[2:3]
+; GFX6-NEXT:    s_mov_b64 s[8:9], s[0:1]
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x2
+; GFX6-NEXT:    s_add_u32 s8, s8, s7
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX6-NEXT:    s_addc_u32 s5, s5, 0
+; GFX6-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s1, s[2:3], 0x0
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen glc slc
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    buffer_store_dword v1, v0, s[8:11], 0 offen glc slc
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: private_nontemporal_store_1:
        
    
    
More information about the llvm-commits
mailing list