[llvm] f707e12 - [AMDGPU] Select d16 stores even when sramecc is enabled

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 19 01:35:21 PDT 2022


Author: Jay Foad
Date: 2022-04-19T09:34:32+01:00
New Revision: f707e1255e2f0a46c7a23271f594a9a4e5ec8f08

URL: https://github.com/llvm/llvm-project/commit/f707e1255e2f0a46c7a23271f594a9a4e5ec8f08
DIFF: https://github.com/llvm/llvm-project/commit/f707e1255e2f0a46c7a23271f594a9a4e5ec8f08.diff

LOG: [AMDGPU] Select d16 stores even when sramecc is enabled

The sramecc feature changes the behaviour of d16 loads so they do not
preserve the unused 16 bits of the result register, but it has no impact
on d16 stores, so we should make use of them even when the feature is
enabled.

Differential Revision: https://reviews.llvm.org/D104912

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/BUFInstructions.td
    llvm/lib/Target/AMDGPU/DSInstructions.td
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/test/CodeGen/AMDGPU/store-hi16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 133ee4742e5c4..4e7efef3f9b15 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1851,7 +1851,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OF
 defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
 
 
-let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in {
+let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in {
  // Hiding the extract high pattern in the PatFrag seems to not
  // automatically increase the complexity.
 let AddedComplexity = 1 in {

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 4e7a9b5a65cd8..4d78e3dae2ec8 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -791,7 +791,7 @@ defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
 defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
 defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
 
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
 def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
 def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
 }

diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7ea39d5c51dd2..3f0c42578a11c 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1169,10 +1169,12 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
 def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
 
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
 def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
 def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
+}
 
+let OtherPredicates = [D16PreservesUnusedBits] in {
 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
 def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
 def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
@@ -1363,10 +1365,12 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
 defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
 defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
 
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
 defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
 defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
+}
 
+let OtherPredicates = [D16PreservesUnusedBits] in {
 defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
 defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
 defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
@@ -1489,10 +1493,12 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
 defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
 defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
 
-let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
+let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
 defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
 defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
+}
 
+let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
 defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
 defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
 defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;

diff  --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
index dd32021532f54..e4699f3f926fe 100644
--- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -1,16 +1,15 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900,GFX9,GFX900-MUBUF %s
-; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,GFX9,NO-D16-HI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-MUBUF %s
+; RxN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900,GFX9,GFX900-FLATSCR %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s
 
 ; GCN-LABEL: {{^}}store_global_hi_v2i16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-NEXT: flat_store_short v[0:1], v2
-; GFX906-NEXT: global_store_short v[0:1], v2, off
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -26,11 +25,10 @@ entry:
 ; GCN-LABEL: {{^}}store_global_hi_v2f16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-NEXT: flat_store_short v[0:1], v2
-; GFX906-NEXT: global_store_short v[0:1], v2, off
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -46,11 +44,10 @@ entry:
 ; GCN-LABEL: {{^}}store_global_hi_i32_shift:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-NEXT: flat_store_short v[0:1], v2
-; GFX906-NEXT: global_store_short v[0:1], v2, off
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -65,11 +62,10 @@ entry:
 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-NEXT: flat_store_byte v[0:1], v2
-; GFX906-NEXT: global_store_byte v[0:1], v2, off
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -85,11 +81,10 @@ entry:
 ; GCN-LABEL: {{^}}store_global_hi_i8_shift:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-NEXT: flat_store_byte v[0:1], v2
-; GFX906-NEXT: global_store_byte v[0:1], v2, off
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -103,16 +98,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
 
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_short v[0:1], v2{{$}}
 
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: global_store_short v[0:1], v2, off
-
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
@@ -127,16 +119,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
 
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_short v[0:1], v{{[0-9]$}}
 
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: global_store_short v[0:1], v2, off
-
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
@@ -150,16 +139,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
 
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
 
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: global_store_byte v[0:1], v2, off
-
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
@@ -174,16 +160,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
 
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
 
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: global_store_byte v[0:1], v2, off
-
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
@@ -199,7 +182,7 @@ entry:
 ; GCN-LABEL: {{^}}store_flat_hi_v2i16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
@@ -217,7 +200,7 @@ entry:
 ; GCN-LABEL: {{^}}store_flat_hi_v2f16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
@@ -235,7 +218,7 @@ entry:
 ; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
@@ -253,7 +236,7 @@ entry:
 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
@@ -272,7 +255,7 @@ entry:
 ; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
@@ -289,10 +272,7 @@ entry:
 
 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
-
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: flat_store_short v[0:1], v2 offset:4094
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
 
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
@@ -318,10 +298,7 @@ entry:
 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff802, v
 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v
 
-; GFX906-DAG: v_lshrrev_b32_e32
-; GFX906: flat_store_short v[0:1], v2{{$}}
-
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
 ; GFX803: flat_store_short v[0:1], v2{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -336,16 +313,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
 
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 ; GFX803: flat_store_byte v[0:1], v2{{$}}
 
-; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906-NEXT: flat_store_byte v[0:1], v2 offset:4095{{$}}
-
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
@@ -367,10 +341,7 @@ entry:
 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff001, v
 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc
 
-; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
-
-; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906: flat_store_byte v[0:1], v2{{$}}
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
 
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_byte v[0:1], v2{{$}}
@@ -390,8 +361,8 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off
+; GFX9-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
+; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
@@ -410,8 +381,8 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2f16:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
+; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
@@ -430,8 +401,8 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_i32_shift:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
+; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
@@ -449,8 +420,8 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
-; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
+; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen{{$}}
@@ -469,8 +440,8 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_i8_shift:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
-; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
+; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen{{$}}
@@ -487,8 +458,8 @@ entry:
 
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
 ; GCN: s_waitcnt
-; GFX900-MUBUF:   buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
-; GFX900-FLATSCR: scratch_store_short_d16_hi off, v0, s32 offset:4094{{$}}
+; GFX9-MUBUF:   buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
+; GFX9-FLATSCR: scratch_store_short_d16_hi off, v0, s32 offset:4094{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s32 offset:4094{{$}}
@@ -509,9 +480,9 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_short_d16_hi v0, off, s[0:3], 0{{$}}
-; GFX900-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, [[SOFF]]{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_short_d16_hi v0, off, s[0:3], 0{{$}}
+; GFX9-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
+; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, [[SOFF]]{{$}}
 
 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], 0{{$}}
@@ -531,9 +502,9 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
 ; GCN: s_waitcnt
 
-; GFX900-MUBUF-NEXT:   buffer_store_byte_d16_hi v0, off, s[0:3], 0{{$}}
-; GFX900-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
-; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, [[SOFF]]{{$}}
+; GFX9-MUBUF-NEXT:   buffer_store_byte_d16_hi v0, off, s[0:3], 0{{$}}
+; GFX9-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
+; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, [[SOFF]]{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
 ; NO-D16-HI: buffer_store_byte v0, off, s[0:3], 0{{$}}
@@ -552,7 +523,7 @@ entry:
 ; GCN-LABEL: {{^}}store_local_hi_v2i16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: ds_write_b16 v0, v1
@@ -571,7 +542,7 @@ entry:
 ; GCN-LABEL: {{^}}store_local_hi_v2f16:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: ds_write_b16 v0, v1
@@ -590,7 +561,7 @@ entry:
 ; GCN-LABEL: {{^}}store_local_hi_i32_shift:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: ds_write_b16 v0, v1
@@ -608,7 +579,7 @@ entry:
 ; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
 ; GCN: s_waitcnt
 
-; GFX900-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
+; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: ds_write_b8 v0, v1
@@ -626,7 +597,7 @@ entry:
 
 ; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
 ; GCN: s_waitcnt
-; GFX900-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
 ; NO-D16-HI: ds_write_b16 v0, v1 offset:65534{{$}}
@@ -645,14 +616,14 @@ entry:
 
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
 ; GCN: s_waitcnt
-; GFX900-MUBUF:        buffer_store_dword
-; GFX900-MUBUF-NEXT:   s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT:   buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4058
-; GFX900-MUBUF-NEXT:   s_waitcnt vmcnt(0)
-; GFX900-FLATSCR:      scratch_store_dword
-; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4058
-; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF:        buffer_store_dword
+; GFX9-MUBUF-NEXT:   s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:   buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4058
+; GFX9-MUBUF-NEXT:   s_waitcnt vmcnt(0)
+; GFX9-FLATSCR:      scratch_store_dword
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4058
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
 define void @store_private_hi_v2i16_to_offset(i32 %arg, [10 x i32] addrspace(5)* %obj0) #0 {
 entry:
   %obj1 = alloca [4096 x i16], align 2, addrspace(5)
@@ -667,13 +638,13 @@ entry:
 
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
 ; GCN: s_waitcnt
-; GFX900-MUBUF:        buffer_store_dword
-; GFX900-MUBUF-NEXT:   s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT:   buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4059
-; GFX900-FLATSCR:      scratch_store_dword
-; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4059
-; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF:        buffer_store_dword
+; GFX9-MUBUF-NEXT:   s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:   buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4059
+; GFX9-FLATSCR:      scratch_store_dword
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4059
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
 define void @store_private_hi_v2i16_i8_to_offset(i32 %arg, [10 x i32] addrspace(5)* %obj0) #0 {
 entry:
   %obj1 = alloca [4096 x i8], align 2, addrspace(5)


        


More information about the llvm-commits mailing list