[llvm] [AMDGPU] Support f64 atomics on gfx1250 (PR #151172)

Tue Jul 29 08:30:24 PDT 2025

https://github.com/changpeng created https://github.com/llvm/llvm-project/pull/151172

   - BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64
   - DS_ADD_F64

>From ba51ef50de89d8fe8aa16289314f0ad1e4c08a2d Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Tue, 29 Jul 2025 00:16:00 -0700
Subject: [PATCH] [AMDGPU] Support f64 atomics on gfx1250

   - BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64
   - DS_ADD_F64

Co-authored-by: Konstantin Zhuravlyov <Konstantin Zhuravlyov at amd.com>
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   2 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   2 +-
 llvm/lib/Target/AMDGPU/BUFInstructions.td     |  12 +-
 llvm/lib/Target/AMDGPU/DSInstructions.td      |   3 +
 llvm/lib/Target/AMDGPU/FLATInstructions.td    |   8 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   4 +-
 .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll  | 790 +++++++++++++++++
 .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll     | 799 ++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_ds.s          |  78 ++
 .../MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s     | 300 +++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s       | 211 +++++
 .../Disassembler/AMDGPU/gfx1250_dasm_ds.txt   |  33 +
 .../AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt     |  90 ++
 .../AMDGPU/gfx1250_dasm_vflat.txt             |  69 ++
 14 files changed, 2397 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a17fb934ff66d..25e1eabb2c293 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2429,7 +2429,7 @@ def HasAtomicFMinFMaxF64FlatInsts :
 
 def HasLdsAtomicAddF64 :
   Predicate<"Subtarget->hasLdsAtomicAddF64()">,
-  AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
+  AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX1250Insts)>;
 
 def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
   AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fedfa3f9dd900..f16351fac9e2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1682,7 +1682,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   if (ST.hasFlatAtomicFaddF32Inst())
     Atomic.legalFor({{S32, FlatPtr}});
 
-  if (ST.hasGFX90AInsts()) {
+  if (ST.hasGFX90AInsts() || ST.hasGFX1250Insts()) {
     // These are legal with some caveats, and should have undergone expansion in
     // the IR in most situations
     // TODO: Move atomic expansion into legalizer
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index f99e71637f70f..1956a15c57d67 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2489,7 +2489,7 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
 }
 
 //===----------------------------------------------------------------------===//
-// MUBUF - GFX11, GFX12.
+// MUBUF - GFX11, GFX12, GFX1250.
 //===----------------------------------------------------------------------===//
 
 // gfx11 instruction that accept both old and new assembler name.
@@ -2600,6 +2600,12 @@ multiclass MUBUF_Real_Atomic_gfx11_gfx12<bits<8> op,
     def : Mnem_gfx12<gfx11_name, gfx12_name>;
 }
 
+multiclass MUBUF_Real_Atomic_gfx12_Renamed<bits<8> op, string real_name> :
+  MUBUF_Real_Atomic_gfx12_impl<op, 0, real_name>,
+  MUBUF_Real_Atomic_gfx12_impl<op, 1, real_name> {
+  def : Mnem_gfx12<get_BUF_ps<NAME>.Mnemonic, real_name>;
+}
+
 defm BUFFER_GL0_INV               : MUBUF_Real_gfx11<0x02B>;
 defm BUFFER_GL1_INV               : MUBUF_Real_gfx11<0x02C>;
 
@@ -2678,6 +2684,10 @@ defm BUFFER_ATOMIC_XOR_X2         : MUBUF_Real_Atomic_gfx11_gfx12<0x04B, "buffer
 defm BUFFER_ATOMIC_PK_ADD_F16     : MUBUF_Real_Atomic_gfx12<0x059>;
 defm BUFFER_ATOMIC_PK_ADD_BF16    : MUBUF_Real_Atomic_gfx12<0x05a>;
 
+defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_gfx12<0x055>;
+defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05b, "buffer_atomic_min_num_f64">;
+defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05c, "buffer_atomic_max_num_f64">;
+
 //===----------------------------------------------------------------------===//
 // MUBUF - GFX10.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 319cc9d1da181..3ff675d6e5e97 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1397,6 +1397,9 @@ defm DS_BVH_STACK_RTN_B32             : DS_Real_gfx12<0x0e0,
 defm DS_BVH_STACK_PUSH8_POP1_RTN_B32  : DS_Real_gfx12<0x0e1>;
 defm DS_BVH_STACK_PUSH8_POP2_RTN_B64  : DS_Real_gfx12<0x0e2>;
 
+defm DS_ADD_F64     : DS_Real_gfx12<0x054>;
+defm DS_ADD_RTN_F64 : DS_Real_gfx12<0x074>;
+
 let AssemblerPredicate = HasLdsBarrierArriveAtomic in {
 defm DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 : DS_Real_gfx12<0x056>;
 defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64   : DS_Real_gfx12<0x075>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8ede9caead8bc..d5d1074622135 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -3488,6 +3488,14 @@ defm GLOBAL_LOAD_TR_B64_w32           : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
 defm GLOBAL_LOAD_TR4_B64              : VFLAT_Real_AllAddr_gfx1250<0x073>;
 defm GLOBAL_LOAD_TR6_B96              : VFLAT_Real_AllAddr_gfx1250<0x074>;
 
+defm FLAT_ATOMIC_ADD_F64              : VFLAT_Real_Atomics_gfx1250<0x055>;
+defm FLAT_ATOMIC_MIN_F64              : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
+defm FLAT_ATOMIC_MAX_F64              : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;
+
+defm GLOBAL_ATOMIC_ADD_F64            : VFLAT_Real_Atomics_gfx1250<0x055>;
+defm GLOBAL_ATOMIC_MIN_F64            : VFLAT_Real_Atomics_gfx1250<0x05b, "global_atomic_min_num_f64">;
+defm GLOBAL_ATOMIC_MAX_F64            : VFLAT_Real_Atomics_gfx1250<0x05c, "global_atomic_max_num_f64">;
+
 def True16D16Table : GenericTable {
   let FilterClass = "True16D16Table";
   let CppTypeName = "True16D16Info";
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 10ded0e1d1c3a..5357a375ae5a9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -715,7 +715,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
 
   // DS_ADD_F64/DS_ADD_RTN_F64
-  bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
+  bool hasLdsAtomicAddF64() const {
+    return hasGFX90AInsts() || hasGFX1250Insts();
+  }
 
   bool hasMultiDwordFlatScratchAddressing() const {
     return getGeneration() >= GFX9;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 0e132f130c844..2785b78da99e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefix=GFX90A
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX1250
 
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -37,6 +38,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -56,6 +68,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -92,6 +111,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -120,6 +157,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -139,6 +187,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -175,6 +230,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -203,6 +276,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -222,6 +306,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -258,6 +349,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -286,6 +394,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -305,6 +424,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -341,6 +467,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -369,6 +512,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -388,6 +542,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -424,6 +585,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -452,6 +631,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -471,6 +661,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -507,6 +704,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -535,6 +750,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -554,6 +780,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -590,6 +823,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -618,6 +868,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -637,6 +898,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -673,6 +941,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -701,6 +986,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -720,6 +1016,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -756,6 +1059,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -784,6 +1105,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -803,6 +1135,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -839,6 +1178,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -867,6 +1224,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -886,6 +1254,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -922,6 +1297,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -950,6 +1342,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -969,6 +1372,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -1005,6 +1415,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -1056,6 +1483,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:  .LBB36_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB36_2
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:  .LBB36_2:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1104,6 +1555,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:  .LBB37_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB37_2
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:  .LBB37_2:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1154,6 +1627,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:  .LBB38_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB38_2
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:  .LBB38_2:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1202,6 +1699,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:  .LBB39_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB39_2
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:  .LBB39_2:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1229,6 +1748,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1254,6 +1786,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1281,6 +1825,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1329,6 +1886,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:  .LBB43_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB43_2
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:  .LBB43_2:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1360,6 +1939,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1389,6 +1981,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1420,6 +2023,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1447,6 +2063,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1472,6 +2101,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1501,6 +2142,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1530,6 +2184,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1575,6 +2240,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:  .LBB51_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB51_3
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT:    s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, s1
+; GFX1250-NEXT:    ds_load_b64 v[2:3], v4
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:  .LBB51_2: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB51_2
+; GFX1250-NEXT:  .LBB51_3:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1620,6 +2319,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:  .LBB52_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB52_3
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT:    s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, s1
+; GFX1250-NEXT:    ds_load_b64 v[2:3], v4
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:  .LBB52_2: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB52_2
+; GFX1250-NEXT:  .LBB52_3:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1665,6 +2398,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:  .LBB53_2:
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT:    s_cbranch_execz .LBB53_3
+; GFX1250-NEXT:  ; %bb.1:
+; GFX1250-NEXT:    s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT:    s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, s1
+; GFX1250-NEXT:    ds_load_b64 v[2:3], v4
+; GFX1250-NEXT:    v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT:  .LBB53_2: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB53_2
+; GFX1250-NEXT:  .LBB53_3:
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1687,6 +2454,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
 ; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, v0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[0:1], 4.0, v[4:5]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret double %ret
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 6067194d947fa..f9a24fee59692 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX942
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX1250
 
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -38,6 +39,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -57,6 +69,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -93,6 +112,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -121,6 +158,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -140,6 +188,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -176,6 +231,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -204,6 +277,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -223,6 +307,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -259,6 +350,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -287,6 +395,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -306,6 +425,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -342,6 +468,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -370,6 +513,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -389,6 +543,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -425,6 +586,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -453,6 +632,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -472,6 +662,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -508,6 +705,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -536,6 +751,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -555,6 +781,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -591,6 +824,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -619,6 +869,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -638,6 +899,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -674,6 +942,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -702,6 +987,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -721,6 +1017,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -757,6 +1060,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -785,6 +1106,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   ret void
@@ -804,6 +1136,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
   store double %ret, ptr poison
@@ -840,6 +1179,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_mov_b32 s6, 4
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -868,6 +1225,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -887,6 +1255,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -923,6 +1298,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -951,6 +1343,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX942-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   ret void
@@ -970,6 +1373,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
   store double %ret, ptr poison
@@ -1006,6 +1416,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    global_store_dwordx2 v0, v[2:3], s[8:9]
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s10
+; GFX1250-NEXT:    buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
   store double %ret, ptr addrspace(1) %out, align 8
@@ -1038,6 +1465,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1067,6 +1507,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1098,6 +1549,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1127,6 +1591,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1154,6 +1629,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1179,6 +1667,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1206,6 +1706,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1246,6 +1759,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
@@ -1277,6 +1801,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1306,6 +1843,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1337,6 +1885,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1364,6 +1925,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1389,6 +1963,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1418,6 +2004,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
 ; GFX942-NEXT:    buffer_inv sc0 sc1
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT:    global_wb scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_storecnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_SYS
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1458,6 +2057,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT:    s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT:    global_inv scope:SCOPE_DEV
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1
   ret void
@@ -1485,6 +2095,31 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do
 ; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    s_load_b32 s2, s[4:5], 0x24
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x2c
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s2
+; GFX1250-NEXT:    s_mov_b32 s2, 0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:  .LBB51_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[4:5], s[0:1], v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s2, vcc_lo, s2
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s2
+; GFX1250-NEXT:    s_cbranch_execnz .LBB51_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret void
@@ -1508,6 +2143,30 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
 ; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, v1
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
@@ -1534,6 +2193,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
 ; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:  .LBB53_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB53_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1560,6 +2242,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
 ; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:  .LBB54_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB54_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret void
@@ -1586,6 +2291,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
 ; GFX942-NEXT:    ds_add_f64 v2, v[0:1]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:  .LBB55_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB55_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_endpgm
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
@@ -1608,6 +2336,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
 ; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_mov_b32_e32 v2, v0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:  .LBB56_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[0:1], 4.0, v[4:5]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB56_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
   ret double %ret
@@ -1631,6 +2382,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
 ; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, v1
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:  .LBB57_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB57_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
@@ -1654,6 +2429,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
 ; GFX942-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
+; GFX1250:       ; %bb.0: ; %main_body
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT:    v_mov_b32_e32 v4, v1
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_mov_b32 s0, 0
+; GFX1250-NEXT:  .LBB58_1: ; %atomicrmw.start
+; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT:    ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_cbranch_execnz .LBB58_1
+; GFX1250-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
 main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
index 98436c9d6aa9c..f1641fc693b1c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
@@ -24,3 +24,81 @@ ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513
 ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535
 // GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535 ; encoding: [0xff,0xff,0xd4,0xd9,0x02,0x04,0x00,0xfe]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+ds_add_f64 v1, v[2:3] offset:65535
+// GFX1250: ds_add_f64 v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v255, v[2:3] offset:65535
+// GFX1250: ds_add_f64 v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v255, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[254:255] offset:65535
+// GFX1250: ds_add_f64 v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[254:255] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3]
+// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3]
+// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3] offset:4
+// GFX1250: ds_add_f64 v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:4
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4
+// GFX12-ERR-NEXT:{{^}}^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
index 1d14bd91a7569..7a4da255b5594 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
@@ -18,3 +18,303 @@ buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
 // GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
 // GFX12-ERR-NEXT:{{^}}                                                            ^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index c5288a76e5721..1ef40832e0460 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -285,6 +285,217 @@ flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset
 // GFX1250: flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
+flat_atomic_add_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_add_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_min_num_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_min_num_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_max_num_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_max_num_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_min_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_max_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+
 global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
 // GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
index e03c4327d9814..0870aa7ba3dc2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
@@ -17,3 +17,36 @@
 
 # GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513 ; encoding: [0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02]
 0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02
+
+# GFX1250: ds_add_f64 v1, v[254:255] offset:65535  ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00]
+0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3]                   ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3] offset:4          ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3] offset:65535      ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00]
+0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v255, v[2:3] offset:65535    ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00]
+0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00
+
+# GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe]
+0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3]       ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
index a2f12115bb64b..2499225626acc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
@@ -8,3 +8,93 @@
 
 # GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
 0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
index 291192b53e320..c8eee9b36eb94 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -3177,6 +3177,75 @@
 # GFX1250: global_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
 0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00
 
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
 # GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
 0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00