[llvm] 32897c0 - [AMDGPU] Specify a triple to avoid codegen changes depending on host OS

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 3 05:33:55 PST 2020


Author: Jay Foad
Date: 2020-11-03T13:33:44Z
New Revision: 32897c05ab6ba90c3496270046aa5664b37f6557

URL: https://github.com/llvm/llvm-project/commit/32897c05ab6ba90c3496270046aa5664b37f6557
DIFF: https://github.com/llvm/llvm-project/commit/32897c05ab6ba90c3496270046aa5664b37f6557.diff

LOG: [AMDGPU] Specify a triple to avoid codegen changes depending on host OS

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
    llvm/test/CodeGen/AMDGPU/ds_read2.ll
    llvm/test/CodeGen/AMDGPU/ds_write2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 2a4ede05acd6..b79359c7705f 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
@@ -10,7 +10,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
 ; CI-LABEL: write_ds_sub0_offset0_global:
 ; CI:       ; %bb.0: ; %entry
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_sub_i32_e32 v0, vcc, lds.obj at abs32@lo, v0
+; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
 ; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_write_b32 v0, v1 offset:12
@@ -19,7 +19,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
 ; GFX9-LABEL: write_ds_sub0_offset0_global:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_sub_u32_e32 v0, lds.obj at abs32@lo, v0
+; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX9-NEXT:    ds_write_b32 v0, v1 offset:12
 ; GFX9-NEXT:    s_endpgm
@@ -37,7 +37,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
 ; CI:       ; %bb.0: ; %entry
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0x9
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_sub_i32_e32 v0, vcc, lds.obj at abs32@lo, v0
+; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
 ; CI-NEXT:    s_mov_b64 vcc, 0
 ; CI-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -57,7 +57,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b64 vcc, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_sub_u32_e32 v0, lds.obj at abs32@lo, v0
+; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_div_fmas_f32 v2, v1, v1, v1

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 01d47662e1d5..15e97e3d471c 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
 
 ; FIXME: We don't get cases where the address was an SGPR because we
 ; get a copy to the address register for each one.
@@ -13,9 +13,8 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 {
 ; CI-LABEL: simple_read2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v1 offset1:8
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -28,8 +27,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 {
 ; GFX9-LABEL: simple_read2_f32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v2
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:8
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
@@ -51,9 +49,8 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out)
 ; CI-LABEL: simple_read2_f32_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v1 offset1:255
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:255
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -66,8 +63,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out)
 ; GFX9-LABEL: simple_read2_f32_max_offset:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v2
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:255
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:255
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
@@ -89,15 +85,14 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0
 ; CI-LABEL: simple_read2_f32_too_far:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read_b32 v2, v1
-; CI-NEXT:    ds_read_b32 v1, v1 offset:1028
+; CI-NEXT:    ds_read_b32 v1, v0
+; CI-NEXT:    ds_read_b32 v2, v0 offset:1028
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_add_f32_e32 v2, v2, v1
+; CI-NEXT:    v_add_f32_e32 v2, v1, v2
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -105,12 +100,11 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0
 ; GFX9-LABEL: simple_read2_f32_too_far:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, lds at abs32@lo, v0
-; GFX9-NEXT:    ds_read_b32 v2, v1
-; GFX9-NEXT:    ds_read_b32 v1, v1 offset:1028
+; GFX9-NEXT:    ds_read_b32 v1, v0
+; GFX9-NEXT:    ds_read_b32 v2, v0 offset:1028
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -129,18 +123,16 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
 ; CI-LABEL: simple_read2_f32_x2:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v3, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset1:8
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
+; CI-NEXT:    ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_add_f32_e32 v4, v1, v2
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_f32_e32 v1, v1, v2
-; CI-NEXT:    v_add_f32_e32 v2, v4, v1
+; CI-NEXT:    v_add_f32_e32 v2, v3, v4
+; CI-NEXT:    v_add_f32_e32 v2, v1, v2
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -148,9 +140,8 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
 ; GFX9-LABEL: simple_read2_f32_x2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v2, lds at abs32@lo, v4
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
-; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v4 offset1:8
+; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
@@ -188,19 +179,18 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out)
 ; CI-LABEL: simple_read2_f32_x2_barrier:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v3, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset1:8
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_barrier
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
-; CI-NEXT:    v_add_f32_e32 v4, v1, v2
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
+; CI-NEXT:    v_add_f32_e32 v3, v1, v2
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset0:11 offset1:27
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_f32_e32 v1, v1, v2
-; CI-NEXT:    v_add_f32_e32 v2, v4, v1
+; CI-NEXT:    v_add_f32_e32 v2, v3, v1
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -208,11 +198,10 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out)
 ; GFX9-LABEL: simple_read2_f32_x2_barrier:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v2, lds at abs32@lo, v4
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v4 offset1:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_barrier
-; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -253,18 +242,16 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)*
 ; CI-LABEL: simple_read2_f32_x2_nonzero_base:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v3, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset0:2 offset1:8
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset0:2 offset1:8
+; CI-NEXT:    ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_add_f32_e32 v4, v1, v2
-; CI-NEXT:    ds_read2_b32 v[1:2], v3 offset0:11 offset1:27
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_f32_e32 v1, v1, v2
-; CI-NEXT:    v_add_f32_e32 v2, v4, v1
+; CI-NEXT:    v_add_f32_e32 v2, v3, v4
+; CI-NEXT:    v_add_f32_e32 v2, v1, v2
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:8
 ; CI-NEXT:    s_endpgm
@@ -272,9 +259,8 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)*
 ; GFX9-LABEL: simple_read2_f32_x2_nonzero_base:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v2, lds at abs32@lo, v4
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset0:2 offset1:8
-; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:11 offset1:27
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v4 offset0:2 offset1:8
+; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
@@ -422,9 +408,8 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0
 ; CI-LABEL: read2_ptr_is_subreg_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b32 v[1:2], v1 offset1:8
+; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -437,8 +422,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0
 ; GFX9-LABEL: read2_ptr_is_subreg_f32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v2
-; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:8
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
@@ -466,15 +450,14 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out)
 ; CI-LABEL: simple_read2_f32_volatile_0:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read_b32 v2, v1
-; CI-NEXT:    ds_read_b32 v1, v1 offset:32
+; CI-NEXT:    ds_read_b32 v1, v0
+; CI-NEXT:    ds_read_b32 v2, v0 offset:32
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_add_f32_e32 v2, v2, v1
+; CI-NEXT:    v_add_f32_e32 v2, v1, v2
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -482,12 +465,11 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out)
 ; GFX9-LABEL: simple_read2_f32_volatile_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, lds at abs32@lo, v0
-; GFX9-NEXT:    ds_read_b32 v2, v1
-; GFX9-NEXT:    ds_read_b32 v1, v1 offset:32
+; GFX9-NEXT:    ds_read_b32 v1, v0
+; GFX9-NEXT:    ds_read_b32 v2, v0 offset:32
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -506,15 +488,14 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out)
 ; CI-LABEL: simple_read2_f32_volatile_1:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v1, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read_b32 v2, v1
-; CI-NEXT:    ds_read_b32 v1, v1 offset:32
+; CI-NEXT:    ds_read_b32 v1, v0
+; CI-NEXT:    ds_read_b32 v2, v0 offset:32
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_add_f32_e32 v2, v2, v1
+; CI-NEXT:    v_add_f32_e32 v2, v1, v2
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -522,12 +503,11 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out)
 ; GFX9-LABEL: simple_read2_f32_volatile_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v1, lds at abs32@lo, v0
-; GFX9-NEXT:    ds_read_b32 v2, v1
-; GFX9-NEXT:    ds_read_b32 v1, v1 offset:32
+; GFX9-NEXT:    ds_read_b32 v1, v0
+; GFX9-NEXT:    ds_read_b32 v2, v0 offset:32
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_add_f32_e32 v1, v2, v1
+; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -809,9 +789,8 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 {
 ; CI-LABEL: simple_read2_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds.f64 at abs32@lo, v4
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b64 v[0:3], v0 offset1:8
+; CI-NEXT:    ds_read2_b64 v[0:3], v4 offset1:8
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -824,8 +803,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 {
 ; GFX9-LABEL: simple_read2_f64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, lds.f64 at abs32@lo, v4
-; GFX9-NEXT:    ds_read2_b64 v[0:3], v0 offset1:8
+; GFX9-NEXT:    ds_read2_b64 v[0:3], v4 offset1:8
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
@@ -847,9 +825,8 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out
 ; CI-LABEL: simple_read2_f64_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds.f64 at abs32@lo, v4
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read2_b64 v[0:3], v0 offset1:255
+; CI-NEXT:    ds_read2_b64 v[0:3], v4 offset1:255
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -862,8 +839,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out
 ; GFX9-LABEL: simple_read2_f64_max_offset:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, lds.f64 at abs32@lo, v4
-; GFX9-NEXT:    ds_read2_b64 v[0:3], v0 offset1:255
+; GFX9-NEXT:    ds_read2_b64 v[0:3], v4 offset1:255
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
@@ -885,10 +861,9 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #
 ; CI-LABEL: simple_read2_f64_too_far:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT:    v_add_i32_e32 v3, vcc, lds.f64 at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_read_b64 v[1:2], v3
-; CI-NEXT:    ds_read_b64 v[3:4], v3 offset:2056
+; CI-NEXT:    ds_read_b64 v[1:2], v0
+; CI-NEXT:    ds_read_b64 v[3:4], v0 offset:2056
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -901,9 +876,8 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #
 ; GFX9-LABEL: simple_read2_f64_too_far:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT:    v_add_u32_e32 v2, lds.f64 at abs32@lo, v4
-; GFX9-NEXT:    ds_read_b64 v[0:1], v2
-; GFX9-NEXT:    ds_read_b64 v[2:3], v2 offset:2056
+; GFX9-NEXT:    ds_read_b64 v[0:1], v4
+; GFX9-NEXT:    ds_read_b64 v[2:3], v4 offset:2056
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
@@ -971,7 +945,7 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl
 define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
 ; CI-LABEL: load_constant_adjacent_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -984,7 +958,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out
 ;
 ; GFX9-LABEL: load_constant_adjacent_offsets:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1003,7 +977,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out
 define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
 ; CI-LABEL: load_constant_disjoint_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[0:1], v0 offset1:2
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1016,7 +990,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
 ;
 ; GFX9-LABEL: load_constant_disjoint_offsets:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:2
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1037,7 +1011,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
 define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
 ; CI-LABEL: load_misaligned64_constant_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    v_mov_b32_e32 v2, bar at abs32@lo
+; CI-NEXT:    v_mov_b32_e32 v2, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[0:1], v2 offset1:1
 ; CI-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
@@ -1052,7 +1026,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)*
 ;
 ; GFX9-ALIGNED-LABEL: load_misaligned64_constant_offsets:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v2, bar at abs32@lo
+; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-ALIGNED-NEXT:    ds_read2_b32 v[0:1], v2 offset1:1
 ; GFX9-ALIGNED-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1066,7 +1040,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)*
 ;
 ; GFX9-UNALIGNED-LABEL: load_misaligned64_constant_offsets:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, bar at abs32@lo
+; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-UNALIGNED-NEXT:    ds_read_b128 v[0:3], v0
 ; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1088,11 +1062,8 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)*
 define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
 ; CI-LABEL: load_misaligned64_constant_large_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_mov_b32 s4, bar.large at abs32@lo
-; CI-NEXT:    s_add_i32 s5, s4, 0x4000
-; CI-NEXT:    s_addk_i32 s4, 0x7ff8
-; CI-NEXT:    v_mov_b32_e32 v0, s5
-; CI-NEXT:    v_mov_b32_e32 v2, s4
+; CI-NEXT:    v_mov_b32_e32 v0, 0x4000
+; CI-NEXT:    v_mov_b32_e32 v2, 0x7ff8
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
 ; CI-NEXT:    ds_read2_b32 v[2:3], v2 offset1:1
@@ -1107,11 +1078,8 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac
 ;
 ; GFX9-LABEL: load_misaligned64_constant_large_offsets:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s2, bar.large at abs32@lo
-; GFX9-NEXT:    s_add_i32 s3, s2, 0x4000
-; GFX9-NEXT:    s_addk_i32 s2, 0x7ff8
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4000
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x7ff8
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
 ; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset1:1
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1399,26 +1367,25 @@ bb:
 define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspace(3)* %arg) {
 ; CI-LABEL: ds_read_call_read:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; CI-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
+; CI-NEXT:    s_getpc_b64 s[40:41]
+; CI-NEXT:    s_mov_b32 s40, s0
+; CI-NEXT:    s_load_dwordx4 s[40:43], s[40:41], 0x0
 ; CI-NEXT:    s_load_dwordx2 s[36:37], s[0:1], 0x9
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0xb
-; CI-NEXT:    s_mov_b32 s42, -1
-; CI-NEXT:    s_mov_b32 s43, 0xe8f000
-; CI-NEXT:    s_add_u32 s40, s40, s3
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    s_addc_u32 s41, s41, 0
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    s_mov_b32 s32, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_add_u32 s40, s40, s3
+; CI-NEXT:    s_addc_u32 s41, s41, 0
 ; CI-NEXT:    v_add_i32_e32 v40, vcc, s0, v0
 ; CI-NEXT:    s_getpc_b64 s[0:1]
 ; CI-NEXT:    s_add_u32 s0, s0, void_func_void at gotpcrel32@lo+4
 ; CI-NEXT:    s_addc_u32 s1, s1, void_func_void at gotpcrel32@hi+12
-; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; CI-NEXT:    ds_read_b32 v41, v40
 ; CI-NEXT:    s_mov_b64 s[0:1], s[40:41]
 ; CI-NEXT:    s_mov_b64 s[2:3], s[42:43]
-; CI-NEXT:    s_mov_b32 s32, 0
 ; CI-NEXT:    s_mov_b32 s39, 0xf000
 ; CI-NEXT:    s_mov_b32 s38, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1431,24 +1398,23 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa
 ;
 ; GFX9-LABEL: ds_read_call_read:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
+; GFX9-NEXT:    s_getpc_b64 s[36:37]
+; GFX9-NEXT:    s_mov_b32 s36, s0
+; GFX9-NEXT:    s_load_dwordx4 s[36:39], s[36:37], 0x0
 ; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_add_u32 s36, s36, s3
 ; GFX9-NEXT:    s_addc_u32 s37, s37, 0
 ; GFX9-NEXT:    s_getpc_b64 s[0:1]
 ; GFX9-NEXT:    s_add_u32 s0, s0, void_func_void at gotpcrel32@lo+4
 ; GFX9-NEXT:    s_addc_u32 s1, s1, void_func_void at gotpcrel32@hi+12
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_lshl_add_u32 v40, v0, 2, s2
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX9-NEXT:    ds_read_b32 v41, v40
 ; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX9-NEXT:    ds_read_b32 v0, v40 offset:4

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index ef1369b24350..8346ce90b183 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-ALIGNED %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
 
 @lds = addrspace(3) global [512 x float] undef, align 4
 @lds.f64 = addrspace(3) global [512 x double] undef, align 8
@@ -16,7 +16,6 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[0:3], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v1, v1 offset1:8
@@ -28,7 +27,6 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v1 offset1:8
 ; GFX9-NEXT:    s_endpgm
@@ -54,7 +52,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v2, v1 offset1:8
@@ -67,7 +64,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[0:1] offset:4
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
 ; GFX9-NEXT:    s_endpgm
@@ -98,7 +94,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    ds_write_b32 v0, v2
@@ -113,7 +108,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    ds_write_b32 v0, v1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -146,7 +140,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    ds_write_b32 v0, v2
@@ -161,7 +154,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    ds_write_b32 v0, v1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -197,7 +189,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa
 ; CI-NEXT:    buffer_load_dwordx2 v[3:4], v[1:2], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64 offset:8
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v3, v2 offset1:8
@@ -207,13 +198,12 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[1:2], v3, s[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[2:3], v3, s[0:1] offset:8
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v2, lds at abs32@lo
-; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v3 offset1:8
 ; GFX9-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -242,7 +232,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)*
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dwordx2 v[1:2], v[1:2], s[0:3], 0 addr64
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
@@ -252,8 +241,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)*
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX9-NEXT:    v_mov_b32_e32 v3, lds at abs32@lo
-; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v3
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[1:2], v1, s[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -283,7 +271,6 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)*
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dwordx4 v[1:4], v[1:2], s[0:3], 0 addr64
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v1, v4 offset1:8
@@ -293,11 +280,10 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)*
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[1:4], v1, s[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v2, lds at abs32@lo
-; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v4 offset1:8
 ; GFX9-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -324,7 +310,6 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v2, v1 offset1:255
@@ -337,7 +322,6 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[0:1] offset:4
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:255
 ; GFX9-NEXT:    s_endpgm
@@ -368,7 +352,6 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)*
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    ds_write_b32 v0, v2
@@ -383,7 +366,6 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)*
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    ds_write_b32 v0, v1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -416,7 +398,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C,
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v2, v1 offset1:8
@@ -430,7 +411,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C,
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
@@ -474,7 +454,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b32 v0, v2, v1 offset0:3 offset1:8
@@ -488,7 +467,6 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[2:3]
-; GFX9-NEXT:    v_add_u32_e32 v0, lds at abs32@lo, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:3 offset1:8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
@@ -588,7 +566,6 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dwordx2 v[1:2], v[0:1], s[0:3], 0 addr64
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds.f64 at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b64 v0, v[1:2], v[1:2] offset1:8
@@ -600,7 +577,6 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
-; GFX9-NEXT:    v_add_u32_e32 v2, lds.f64 at abs32@lo, v2
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b64 v2, v[0:1], v[0:1] offset1:8
 ; GFX9-NEXT:    s_endpgm
@@ -754,7 +730,6 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8
-; CI-NEXT:    v_add_i32_e32 v0, vcc, lds.f64 at abs32@lo, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:8
@@ -767,7 +742,6 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v4, s[0:1]
 ; GFX9-NEXT:    global_load_dwordx2 v[2:3], v4, s[0:1] offset:8
-; GFX9-NEXT:    v_add_u32_e32 v4, lds.f64 at abs32@lo, v4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b64 v4, v[0:1], v[2:3] offset1:8
 ; GFX9-NEXT:    s_endpgm
@@ -790,7 +764,7 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() {
 ; CI-LABEL: store_constant_adjacent_offsets:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_movk_i32 s0, 0x7b
-; CI-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    v_mov_b32_e32 v1, s0
 ; CI-NEXT:    v_mov_b32_e32 v2, s0
 ; CI-NEXT:    s_mov_b32 m0, -1
@@ -800,7 +774,7 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() {
 ; GFX9-LABEL: store_constant_adjacent_offsets:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_movk_i32 s0, 0x7b
-; GFX9-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
@@ -813,17 +787,17 @@ define amdgpu_kernel void @store_constant_adjacent_offsets() {
 define amdgpu_kernel void @store_constant_disjoint_offsets() {
 ; CI-LABEL: store_constant_disjoint_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
-; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
+; CI-NEXT:    v_mov_b32_e32 v0, 0x7b
+; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_write2_b32 v0, v1, v1 offset1:2
+; CI-NEXT:    ds_write2_b32 v1, v0, v0 offset1:2
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: store_constant_disjoint_offsets:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, foo at abs32@lo
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9-NEXT:    ds_write2_b32 v0, v1, v1 offset1:2
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    ds_write2_b32 v1, v0, v0 offset1:2
 ; GFX9-NEXT:    s_endpgm
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -835,21 +809,19 @@ define amdgpu_kernel void @store_constant_disjoint_offsets() {
 define amdgpu_kernel void @store_misaligned64_constant_offsets() {
 ; CI-LABEL: store_misaligned64_constant_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    v_mov_b32_e32 v0, bar at abs32@lo
+; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
-; CI-NEXT:    v_mov_b32_e32 v2, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
+; CI-NEXT:    ds_write2_b32 v0, v1, v0 offset1:1
+; CI-NEXT:    ds_write2_b32 v0, v1, v0 offset0:2 offset1:3
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX9-ALIGNED-LABEL: store_misaligned64_constant_offsets:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v0, bar at abs32@lo
+; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9-ALIGNED-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-ALIGNED-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; GFX9-ALIGNED-NEXT:    ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
+; GFX9-ALIGNED-NEXT:    ds_write2_b32 v0, v1, v0 offset1:1
+; GFX9-ALIGNED-NEXT:    ds_write2_b32 v0, v1, v0 offset0:2 offset1:3
 ; GFX9-ALIGNED-NEXT:    s_endpgm
 ;
 ; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets:
@@ -858,8 +830,7 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() {
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v4, bar at abs32@lo
-; GFX9-UNALIGNED-NEXT:    ds_write_b128 v4, v[0:3]
+; GFX9-UNALIGNED-NEXT:    ds_write_b128 v1, v[0:3]
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@@ -871,28 +842,22 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() {
 define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
 ; CI-LABEL: store_misaligned64_constant_large_offsets:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_mov_b32 s0, bar.large at abs32@lo
-; CI-NEXT:    s_add_i32 s1, s0, 0x4000
-; CI-NEXT:    v_mov_b32_e32 v0, s1
+; CI-NEXT:    v_mov_b32_e32 v0, 0x4000
 ; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; CI-NEXT:    v_mov_b32_e32 v2, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_addk_i32 s0, 0x7ff8
 ; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    v_mov_b32_e32 v0, 0x7ff8
 ; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: store_misaligned64_constant_large_offsets:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s0, bar.large at abs32@lo
-; GFX9-NEXT:    s_add_i32 s1, s0, 0x4000
-; GFX9-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4000
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_addk_i32 s0, 0x7ff8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7ff8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
 ; GFX9-NEXT:    s_endpgm
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4


        


More information about the llvm-commits mailing list