[llvm] 53f21e0 - TableGen/GlobalISel: Hack the operand order for atomic_store

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 11 07:22:52 PDT 2020


Author: Matt Arsenault
Date: 2020-08-11T10:22:44-04:00
New Revision: 53f21e0fb754799fc9a7ad7243a9161e2411b51d

URL: https://github.com/llvm/llvm-project/commit/53f21e0fb754799fc9a7ad7243a9161e2411b51d
DIFF: https://github.com/llvm/llvm-project/commit/53f21e0fb754799fc9a7ad7243a9161e2411b51d.diff

LOG: TableGen/GlobalISel: Hack the operand order for atomic_store

ISD::ATOMIC_STORE arbitrarily has the operands in the opposite order
from regular ISD::STORE, which always introduced an annoying
duplication of patterns to handle both cases. Since in GlobalISel
there's just the one G_STORE, we need to swap the operands to
correctly emit the type check for the pointer operand.

Some work started in 20aafa31569b5157e792daa8860d71dd0df8a53a to
migrate SelectionDAG to use ISD::STORE for atomics, but that work
seems to have stalled. Since this is the pretty much the last
operation which matters which isn't supported for AMDGPU, use this
compatibility hack to unblock declaring it functionally complete.

Not sure what's going on with the pending_phis AArch64 test. It seems
it didn't always use atomics, and I'm not sure what it was originally
testing matters anymore.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir
    llvm/test/TableGen/GlobalISelEmitter-atomic_store.td

Modified: 
    llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/DSInstructions.td
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
    llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
    llvm/utils/TableGen/GlobalISelEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index e1ead56e2216..03fe1b21a339 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -180,6 +180,12 @@ def : GINodeEquiv<G_LOAD, atomic_load> {
   let CheckMMOIsAtomic = 1;
 }
 
+// Operands are swapped for atomic_store vs. regular store
+def : GINodeEquiv<G_STORE, atomic_store> {
+  let CheckMMOIsNonAtomic = 0;
+  let CheckMMOIsAtomic = 1;
+}
+
 def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>;
 def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap>;
 def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index e5458f21e002..fc11255594de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -138,6 +138,9 @@ def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
   bit CheckMMOIsAtomic = 1;
 }
 
+def : GINodeEquiv<G_STORE, AMDGPUatomic_st_glue> {
+  bit CheckMMOIsAtomic = 1;
+}
 
 
 def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>;

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index beb01b1abf0f..03c3faf0797b 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -719,7 +719,7 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
 // normal store.
 class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
-  (inst $ptr, $value, offset:$offset, (i1 0))
+  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 0))
 >;
 
 multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index f555856f34e5..0b3371501ef8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -32,28 +32,6 @@ define i128 @ABIi128(i128 %arg1) {
   ret i128 %res
 }
 
-  ; The key problem here is that we may fail to create an MBB referenced by a
-  ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things
-  ; happen.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %6:gpr(s32), %2:gpr(p0) :: (store seq_cst 4 into %ir.addr) (in function: pending_phis)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis
-; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis:
-define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) {
-  br i1 %tst, label %true, label %false
-
-end:
-  %res = phi i32 [%val, %true], [42, %false]
-  ret i32 %res
-
-true:
-  store atomic i32 42, i32* %addr seq_cst, align 4
-  br label %end
-
-false:
-  br label %end
-
-}
-
 ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
 ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
@@ -72,16 +50,6 @@ define i128 @sequence_sizes([8 x i8] %in) {
   ret i128 undef
 }
 
-; Just to make sure we don't accidentally emit a normal load/store.
-; FALLBACK-WITH-REPORT-ERR: cannot select: G_STORE %1:gpr(s64), %0:gpr64sp(p0) :: (store unordered 8 into %ir.addr) (in function: atomic_ops)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops
-; FALLBACK-WITH-REPORT-LABEL: atomic_ops:
-define i64 @atomic_ops(i64* %addr) {
-  store atomic i64 0, i64* %addr unordered, align 8
-  %res = load atomic i64, i64* %addr seq_cst, align 8
-  ret i64 %res
-}
-
 ; Make sure we don't mess up metadata arguments.
 declare void @llvm.write_register.i64(metadata, i64)
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
new file mode 100644
index 000000000000..379936c640e7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir
@@ -0,0 +1,272 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+
+name: atomic_store_flat_s32_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1_vgpr2
+
+    ; GFX7-LABEL: name: atomic_store_flat_s32_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
+    ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p0) = COPY $vgpr1_vgpr2
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_v2s16_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1_vgpr2
+
+    ; GFX7-LABEL: name: atomic_store_flat_v2s16_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4)
+    ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(p0) = COPY $vgpr1_vgpr2
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_p3_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1_vgpr2
+
+    ; GFX7-LABEL: name: atomic_store_flat_p3_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4)
+    ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4)
+    %0:vgpr(p3) = COPY $vgpr0
+    %1:vgpr(p0) = COPY $vgpr1_vgpr2
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_p5_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1_vgpr2
+
+    ; GFX7-LABEL: name: atomic_store_flat_p5_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4)
+    ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p0) = COPY $vgpr1_vgpr2
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_p6_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1_vgpr2
+
+    ; GFX7-LABEL: name: atomic_store_flat_p6_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4)
+    ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2
+    ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4)
+    %0:vgpr(p6) = COPY $vgpr0
+    %1:vgpr(p0) = COPY $vgpr1_vgpr2
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_s64_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: atomic_store_flat_s64_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
+    ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = COPY $vgpr2_vgpr3
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_v2s32_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8)
+    ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8)
+    %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = COPY $vgpr2_vgpr3
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_v4s16_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: atomic_store_flat_v4s16_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8)
+    ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8)
+    %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = COPY $vgpr2_vgpr3
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: atomic_store_flat_p0_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: atomic_store_flat_p0_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8)
+    ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8)
+    %0:vgpr(p0) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = COPY $vgpr2_vgpr3
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
+
+...
+---
+
+name: atomic_store_flat_p1_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: atomic_store_flat_p1_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8)
+    ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
+    ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = COPY $vgpr2_vgpr3
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir
new file mode 100644
index 000000000000..aeb62a4a05f3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir
@@ -0,0 +1,343 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+
+name: atomic_store_local_s32_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: atomic_store_local_s32_seq_cst
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 4, addrspace 3)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p3) = COPY $vgpr1
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_v2s16_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: atomic_store_local_v2s16_seq_cst
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_v2s16_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_v2s16_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(p3) = COPY $vgpr1
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_p3_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: atomic_store_local_p3_seq_cst
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_p3_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_p3_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    %0:vgpr(p3) = COPY $vgpr0
+    %1:vgpr(p3) = COPY $vgpr1
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_p5_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: atomic_store_local_p5_seq_cst
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_p5_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_p5_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p3) = COPY $vgpr1
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_p6_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: atomic_store_local_p6_seq_cst
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_p6_seq_cst
+    ; GFX7: liveins: $vgpr0, $vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_p6_seq_cst
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3)
+    %0:vgpr(p6) = COPY $vgpr0
+    %1:vgpr(p3) = COPY $vgpr1
+    G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_s64_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX6-LABEL: name: atomic_store_local_s64_seq_cst
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 8, addrspace 3)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_v2s32_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: atomic_store_local_v2s32_seq_cst
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_v2s32_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_v2s32_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_v4s16_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX6-LABEL: name: atomic_store_local_v4s16_seq_cst
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_v4s16_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_v4s16_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3)
+
+...
+
+---
+
+name: atomic_store_local_p0_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX6-LABEL: name: atomic_store_local_p0_seq_cst
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_p0_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_p0_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    %0:vgpr(p0) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3)
+
+...
+---
+
+name: atomic_store_local_p1_seq_cst
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX6-LABEL: name: atomic_store_local_p1_seq_cst
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6: $m0 = S_MOV_B32 -1
+    ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX7-LABEL: name: atomic_store_local_p1_seq_cst
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX7: $m0 = S_MOV_B32 -1
+    ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    ; GFX9-LABEL: name: atomic_store_local_p1_seq_cst
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3)
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index f961ba3b6549..38b60d6a0a49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -688,24 +688,25 @@ body: |
 
     ; GFX7-LABEL: name: store_atomic_flat_s32
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX7: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4)
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
     ; GFX8-LABEL: name: store_atomic_flat_s32
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX8: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4)
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
     ; GFX9-LABEL: name: store_atomic_flat_s32
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4)
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
     ; GFX10-LABEL: name: store_atomic_flat_s32
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX10: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4)
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0)
@@ -725,24 +726,25 @@ body: |
 
     ; GFX7-LABEL: name: store_atomic_flat_s64
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX7: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8)
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
     ; GFX8-LABEL: name: store_atomic_flat_s64
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8)
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
     ; GFX9-LABEL: name: store_atomic_flat_s64
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8)
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
     ; GFX10-LABEL: name: store_atomic_flat_s64
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX10: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8)
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 814a051cbc7d..8713d92987b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -786,29 +786,30 @@ body: |
     ; GFX6: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
     ; GFX7-LABEL: name: store_atomic_global_s32
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX7: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
     ; GFX7-FLAT-LABEL: name: store_atomic_global_s32
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX7-FLAT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
+    ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
     ; GFX8-LABEL: name: store_atomic_global_s32
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX8: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
     ; GFX9-LABEL: name: store_atomic_global_s32
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
     ; GFX10-LABEL: name: store_atomic_global_s32
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; GFX10: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1)
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1)
@@ -833,29 +834,30 @@ body: |
     ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
     ; GFX7-LABEL: name: store_atomic_global_s64
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX7: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
     ; GFX7-FLAT-LABEL: name: store_atomic_global_s64
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX7-FLAT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
+    ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
     ; GFX8-LABEL: name: store_atomic_global_s64
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
     ; GFX9-LABEL: name: store_atomic_global_s64
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
     ; GFX10-LABEL: name: store_atomic_global_s64
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
-    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
-    ; GFX10: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1)
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1)

diff  --git a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
index 9ae6c8be7dc2..b5624ac80e59 100644
--- a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
+++ b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
@@ -14,6 +14,7 @@ def FPR32 : RegisterClass<"MyTarget", [f32], 32, (add F0)>;
 def FPR32Op : RegisterOperand<FPR32>;
 def B0 : Register<"b0"> { let Namespace = "MyTarget"; }
 def GPR8 : RegisterClass<"MyTarget", [i8], 8, (add B0)>;
+def GPR8Op : RegisterOperand<GPR8>;
 
 def p0 : PtrValueType <i32, 0>;
 

diff  --git a/llvm/test/TableGen/GlobalISelEmitter-atomic_store.td b/llvm/test/TableGen/GlobalISelEmitter-atomic_store.td
new file mode 100644
index 000000000000..4bcd6ed927e3
--- /dev/null
+++ b/llvm/test/TableGen/GlobalISelEmitter-atomic_store.td
@@ -0,0 +1,24 @@
+// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common %s -o - < %s | FileCheck -check-prefix=GISEL %s
+
+include "llvm/Target/Target.td"
+include "GlobalISelEmitterCommon.td"
+
+def ST_ATOM_B32 : I<(outs), (ins GPR32Op:$val, GPR32Op:$ptr), []>;
+
+// Check that the pattern for atomic_store inverts the operands to
+// match the order of G_STORE.
+
+// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/1,
+// GISEL-NEXT: GIM_CheckAtomicOrderingOrStrongerThan, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::Unordered,
+// GISEL-NEXT: // MIs[0] ptr
+// GISEL-NEXT: GIM_CheckPointerToAny, /*MI*/0, /*Op*/1, /*SizeInBits*/0,
+// GISEL-NEXT: // MIs[0] val
+// GISEL-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+// GISEL-NEXT: // (atomic_store iPTR:{ *:[iPTR] }:$ptr, i32:{ *:[i32] }:$val)<<P:Predicate_atomic_store_8>>  =>  (ST_ATOM_B32 GPR32Op:{ *:[i32] }:$val, GPR32Op:{ *:[i32] }:$ptr)
+// GISEL-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/MyTarget::ST_ATOM_B32,
+def : Pat<
+//  (atomic_store_8 iPTR:$ptr, i32:$val),
+  (atomic_store_8 iPTR:$ptr, i32:$val),
+  (ST_ATOM_B32 GPR32Op:$val, GPR32Op:$ptr)
+>;

diff  --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 52fe9b2af2f0..b9c88b517388 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -3766,9 +3766,12 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
     return failedImport("Src pattern child has predicate (" +
                         explainPredicates(Src) + ")");
   }
+
+  bool IsAtomic = false;
   if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
     InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
   else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
+    IsAtomic = true;
     InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
       "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
   }
@@ -3822,6 +3825,27 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       }
     }
 
+    // Hack around an unfortunate mistake in how atomic store (and really
+    // atomicrmw in general) operands were ordered. A ISD::STORE used the order
+    // <stored value>, <pointer> order. ISD::ATOMIC_STORE used the opposite,
+    // <pointer>, <stored value>. In GlobalISel there's just the one store
+    // opcode, so we need to swap the operands here to get the right type check.
+    if (IsAtomic && SrcGIOrNull->TheDef->getName() == "G_STORE") {
+      assert(NumChildren == 2 && "wrong operands for atomic store");
+
+      TreePatternNode *PtrChild = Src->getChild(0);
+      TreePatternNode *ValueChild = Src->getChild(1);
+
+      if (auto Error = importChildMatcher(Rule, InsnMatcher, PtrChild, true,
+                                          false, 1, TempOpIdx))
+        return std::move(Error);
+
+      if (auto Error = importChildMatcher(Rule, InsnMatcher, ValueChild, false,
+                                          false, 0, TempOpIdx))
+        return std::move(Error);
+      return InsnMatcher;
+    }
+
     // Match the used operands (i.e. the children of the operator).
     bool IsIntrinsic =
         SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||


        


More information about the llvm-commits mailing list