[llvm] d869921 - [AMDGPU] Add patterns for i8/i16 local atomic load/store
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 18 02:25:08 PDT 2021
Author: Piotr Sobczak
Date: 2021-10-18T11:23:10+02:00
New Revision: d869921004526e804f344936d671a994852ab4d2
URL: https://github.com/llvm/llvm-project/commit/d869921004526e804f344936d671a994852ab4d2
DIFF: https://github.com/llvm/llvm-project/commit/d869921004526e804f344936d671a994852ab4d2.diff
LOG: [AMDGPU] Add patterns for i8/i16 local atomic load/store
Add patterns for i8/i16 local atomic load/store.
Added tests for new patterns.
Copied atomic_[store/load]_local.ll to GlobalISel directory.
Differential Revision: https://reviews.llvm.org/D111869
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 119c4089d6c2e..da9665a7f6372 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -422,6 +422,16 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
let MemoryVT = i16;
}
+def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i32;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 003617af985b7..104b5160b9850 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -714,6 +714,10 @@ foreach vt = Reg32Types.types in {
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
}
+defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
@@ -774,6 +778,10 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8c29437a9f119..8c24268e379ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -333,6 +333,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
+def atomic_load_8_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
@@ -423,6 +435,14 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
} // End IsLoad = 1
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_8_glue node:$ptr)> {
+ let MemoryVT = i8;
+}
+def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_16_glue node:$ptr)> {
+ let MemoryVT = i16;
+}
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_32_glue node:$ptr)> {
let MemoryVT = i32;
@@ -509,6 +529,18 @@ def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
let AddressSpaces = StoreAddress_local.AddrSpaces in {
+def atomic_store_local_8_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+def atomic_store_local_16_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
def atomic_store_local_32_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
new file mode 100644
index 0000000000000..1054bdcafd423
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
@@ -0,0 +1,154 @@
+; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u8 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
+ %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
+ ret i8 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
+ %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
+ ret i8 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
+ %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
+ ret i16 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
+ %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
+ ret i16 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
+ %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
+ %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
+ %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
+ ret i64 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
+ %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
+ ret i64 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
+ %load = load atomic float, float addrspace(3)* %gep monotonic, align 4
+ ret float %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
+ %load = load atomic double, double addrspace(3)* %gep monotonic, align 8
+ ret double %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
+ %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
+ ret i8* %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
+ %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
+ ret i8 addrspace(3)* %load
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
new file mode 100644
index 0000000000000..63187c24c6333
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
@@ -0,0 +1,103 @@
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b8 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
+ store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
+ %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
+ store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
+ store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
+ %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
+ store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b32 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
+ store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) {
+ %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
+ store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
+ store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) {
+ %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
+ store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index b8c3fed5d257f..4534767353daa 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -1,6 +1,56 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u8 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
+ %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
+ ret i8 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
+ %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
+ ret i8 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
+ %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
+ ret i16 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
+ %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
+ ret i16 %load
+}
+
; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index 3b69070f3eaec..4e80f5d05b796 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -1,6 +1,56 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b8 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
+ store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
+ %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
+ store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
+ store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
+ %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
+ store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
+ ret void
+}
+
; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
More information about the llvm-commits
mailing list