[llvm] [AMDGPU] Support f64 atomics on gfx1250 (PR #151172)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 08:30:24 PDT 2025
https://github.com/changpeng created https://github.com/llvm/llvm-project/pull/151172
- BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64
- DS_ADD_F64
>From ba51ef50de89d8fe8aa16289314f0ad1e4c08a2d Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Tue, 29 Jul 2025 00:16:00 -0700
Subject: [PATCH] [AMDGPU] Support f64 atomics on gfx1250
- BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64
- DS_ADD_F64
Co-authored-by: Konstantin Zhuravlyov <Konstantin Zhuravlyov at amd.com>
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 2 +-
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/BUFInstructions.td | 12 +-
llvm/lib/Target/AMDGPU/DSInstructions.td | 3 +
llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +-
.../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 790 +++++++++++++++++
.../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 799 ++++++++++++++++++
llvm/test/MC/AMDGPU/gfx1250_asm_ds.s | 78 ++
.../MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s | 300 +++++++
llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s | 211 +++++
.../Disassembler/AMDGPU/gfx1250_dasm_ds.txt | 33 +
.../AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt | 90 ++
.../AMDGPU/gfx1250_dasm_vflat.txt | 69 ++
14 files changed, 2397 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a17fb934ff66d..25e1eabb2c293 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2429,7 +2429,7 @@ def HasAtomicFMinFMaxF64FlatInsts :
def HasLdsAtomicAddF64 :
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
- AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
+ AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX1250Insts)>;
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fedfa3f9dd900..f16351fac9e2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1682,7 +1682,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasFlatAtomicFaddF32Inst())
Atomic.legalFor({{S32, FlatPtr}});
- if (ST.hasGFX90AInsts()) {
+ if (ST.hasGFX90AInsts() || ST.hasGFX1250Insts()) {
// These are legal with some caveats, and should have undergone expansion in
// the IR in most situations
// TODO: Move atomic expansion into legalizer
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index f99e71637f70f..1956a15c57d67 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -2489,7 +2489,7 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
}
//===----------------------------------------------------------------------===//
-// MUBUF - GFX11, GFX12.
+// MUBUF - GFX11, GFX12, GFX1250.
//===----------------------------------------------------------------------===//
// gfx11 instruction that accept both old and new assembler name.
@@ -2600,6 +2600,12 @@ multiclass MUBUF_Real_Atomic_gfx11_gfx12<bits<8> op,
def : Mnem_gfx12<gfx11_name, gfx12_name>;
}
+multiclass MUBUF_Real_Atomic_gfx12_Renamed<bits<8> op, string real_name> :
+ MUBUF_Real_Atomic_gfx12_impl<op, 0, real_name>,
+ MUBUF_Real_Atomic_gfx12_impl<op, 1, real_name> {
+ def : Mnem_gfx12<get_BUF_ps<NAME>.Mnemonic, real_name>;
+}
+
defm BUFFER_GL0_INV : MUBUF_Real_gfx11<0x02B>;
defm BUFFER_GL1_INV : MUBUF_Real_gfx11<0x02C>;
@@ -2678,6 +2684,10 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_gfx11_gfx12<0x04B, "buffer
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_gfx12<0x059>;
defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_gfx12<0x05a>;
+defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_gfx12<0x055>;
+defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05b, "buffer_atomic_min_num_f64">;
+defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05c, "buffer_atomic_max_num_f64">;
+
//===----------------------------------------------------------------------===//
// MUBUF - GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 319cc9d1da181..3ff675d6e5e97 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1397,6 +1397,9 @@ defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0,
defm DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_Real_gfx12<0x0e1>;
defm DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_Real_gfx12<0x0e2>;
+defm DS_ADD_F64 : DS_Real_gfx12<0x054>;
+defm DS_ADD_RTN_F64 : DS_Real_gfx12<0x074>;
+
let AssemblerPredicate = HasLdsBarrierArriveAtomic in {
defm DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 : DS_Real_gfx12<0x056>;
defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_Real_gfx12<0x075>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8ede9caead8bc..d5d1074622135 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -3488,6 +3488,14 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>;
defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>;
+defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
+defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
+defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;
+
+defm GLOBAL_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
+defm GLOBAL_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "global_atomic_min_num_f64">;
+defm GLOBAL_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "global_atomic_max_num_f64">;
+
def True16D16Table : GenericTable {
let FilterClass = "True16D16Table";
let CppTypeName = "True16D16Info";
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 10ded0e1d1c3a..5357a375ae5a9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -715,7 +715,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
// DS_ADD_F64/DS_ADD_RTN_F64
- bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
+ bool hasLdsAtomicAddF64() const {
+ return hasGFX90AInsts() || hasGFX1250Insts();
+ }
bool hasMultiDwordFlatScratchAddressing() const {
return getGeneration() >= GFX9;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 0e132f130c844..2785b78da99e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefix=GFX90A
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX1250
declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -37,6 +38,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -56,6 +68,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -92,6 +111,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -120,6 +157,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -139,6 +187,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -175,6 +230,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -203,6 +276,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -222,6 +306,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -258,6 +349,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -286,6 +394,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -305,6 +424,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -341,6 +467,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -369,6 +512,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -388,6 +542,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -424,6 +585,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -452,6 +631,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -471,6 +661,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -507,6 +704,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -535,6 +750,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -554,6 +780,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -590,6 +823,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -618,6 +868,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -637,6 +898,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -673,6 +941,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -701,6 +986,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -720,6 +1016,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -756,6 +1059,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -784,6 +1105,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -803,6 +1135,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -839,6 +1178,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -867,6 +1224,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -886,6 +1254,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -922,6 +1297,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -950,6 +1342,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -969,6 +1372,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -1005,6 +1415,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -1056,6 +1483,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: .LBB36_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s0, exec_lo
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB36_2
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: .LBB36_2:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1104,6 +1555,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: .LBB37_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s0, exec_lo
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB37_2
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: .LBB37_2:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1154,6 +1627,30 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: .LBB38_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s0, exec_lo
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB38_2
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: .LBB38_2:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1202,6 +1699,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: .LBB39_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s0, exec_lo
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB39_2
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: .LBB39_2:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1229,6 +1748,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1254,6 +1786,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1281,6 +1825,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1329,6 +1886,28 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: .LBB43_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s0, exec_lo
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB43_2
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: .LBB43_2:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1360,6 +1939,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1389,6 +1981,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1420,6 +2023,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1447,6 +2063,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1472,6 +2101,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1501,6 +2142,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1530,6 +2184,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1575,6 +2240,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: .LBB51_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT: s_mov_b32 s2, exec_lo
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB51_3
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v4, s1
+; GFX1250-NEXT: ds_load_b64 v[2:3], v4
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: .LBB51_2: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB51_2
+; GFX1250-NEXT: .LBB51_3:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1620,6 +2319,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: .LBB52_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT: s_mov_b32 s2, exec_lo
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB52_3
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v4, s1
+; GFX1250-NEXT: ds_load_b64 v[2:3], v4
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: .LBB52_2: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB52_2
+; GFX1250-NEXT: .LBB52_3:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1665,6 +2398,40 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: .LBB53_2:
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_mov_b32 s1, exec_lo
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: v_mbcnt_lo_u32_b32 v0, s1, 0
+; GFX1250-NEXT: s_mov_b32 s2, exec_lo
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX1250-NEXT: s_cbranch_execz .LBB53_3
+; GFX1250-NEXT: ; %bb.1:
+; GFX1250-NEXT: s_bcnt1_i32_b32 s1, s1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_cvt_f64_u32_e32 v[0:1], s1
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v4, s1
+; GFX1250-NEXT: ds_load_b64 v[2:3], v4
+; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: .LBB53_2: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[6:7], v[2:3], v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[6:7], v4, v[6:7], v[2:3]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB53_2
+; GFX1250-NEXT: .LBB53_3:
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1687,6 +2454,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, v0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: .LBB54_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_add_f64_e32 v[0:1], 4.0, v[4:5]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB54_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret double %ret
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 6067194d947fa..f9a24fee59692 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX942
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX1250
declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
@@ -38,6 +39,17 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -57,6 +69,13 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -93,6 +112,24 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -121,6 +158,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -140,6 +188,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -176,6 +231,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -204,6 +277,17 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -223,6 +307,13 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -259,6 +350,23 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -287,6 +395,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -306,6 +425,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -342,6 +468,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -370,6 +513,17 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -389,6 +543,13 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -425,6 +586,24 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -453,6 +632,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -472,6 +662,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -508,6 +705,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -536,6 +751,17 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -555,6 +781,13 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -591,6 +824,23 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -619,6 +869,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -638,6 +899,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -674,6 +942,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_min_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -702,6 +987,17 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -721,6 +1017,13 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -757,6 +1060,24 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -785,6 +1106,17 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8)
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
ret void
@@ -804,6 +1136,13 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
store double %ret, ptr poison
@@ -840,6 +1179,24 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_mov_b32 s6, 4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], s6 offen th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -868,6 +1225,17 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -887,6 +1255,13 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -923,6 +1298,23 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -951,6 +1343,17 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(
; GFX942-NEXT: v_mov_b32_e32 v2, s8
; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
ret void
@@ -970,6 +1373,13 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: flat_store_b64 v[0:1], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
store double %ret, ptr poison
@@ -1006,6 +1416,23 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[8:9]
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v2, s10
+; GFX1250-NEXT: buffer_atomic_max_num_f64 v[0:1], v2, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT_RETURN
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x44
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
store double %ret, ptr addrspace(1) %out, align 8
@@ -1038,6 +1465,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1067,6 +1507,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1098,6 +1549,19 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1127,6 +1591,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1154,6 +1629,19 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1179,6 +1667,18 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1206,6 +1706,19 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1246,6 +1759,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
ret void
@@ -1277,6 +1801,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1306,6 +1843,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1337,6 +1885,19 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1364,6 +1925,19 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1389,6 +1963,18 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1418,6 +2004,19 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
; GFX942-NEXT: buffer_inv sc0 sc1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1458,6 +2057,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1
ret void
@@ -1485,6 +2095,31 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do
; GFX942-NEXT: ds_add_f64 v2, v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x24
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x2c
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s2
+; GFX1250-NEXT: s_mov_b32 s2, 0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: .LBB51_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[4:5], s[0:1], v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2
+; GFX1250-NEXT: s_cbranch_execnz .LBB51_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret void
@@ -1508,6 +2143,30 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT: v_mov_b32_e32 v4, v1
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: .LBB52_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB52_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
@@ -1534,6 +2193,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX942-NEXT: ds_add_f64 v2, v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: .LBB53_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB53_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1560,6 +2242,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX942-NEXT: ds_add_f64 v2, v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: .LBB54_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB54_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret void
@@ -1586,6 +2291,29 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX942-NEXT: ds_add_f64 v2, v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_endpgm
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v2, s0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: .LBB55_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_add_f64_e32 v[4:5], 4.0, v[0:1]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[4:5], v2, v[4:5], v[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[0:1]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB55_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
@@ -1608,6 +2336,29 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_pat:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, v0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: .LBB56_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_add_f64_e32 v[0:1], 4.0, v[4:5]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[4:5]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB56_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -1631,6 +2382,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT: v_mov_b32_e32 v4, v1
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: .LBB57_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB57_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
@@ -1654,6 +2429,30 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
+; GFX1250: ; %bb.0: ; %main_body
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v2, v0
+; GFX1250-NEXT: v_mov_b32_e32 v4, v1
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_mov_b32 s0, 0
+; GFX1250-NEXT: .LBB58_1: ; %atomicrmw.start
+; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_add_f64_e32 v[0:1], v[6:7], v[4:5]
+; GFX1250-NEXT: ds_cmpstore_rtn_b64 v[0:1], v2, v[0:1], v[6:7]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_cbranch_execnz .LBB58_1
+; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
+; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
main_body:
%ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
ret double %ret
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
index 98436c9d6aa9c..f1641fc693b1c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_ds.s
@@ -24,3 +24,81 @@ ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513
ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535
// GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[254:255], v2, v[4:5] offset:65535 ; encoding: [0xff,0xff,0xd4,0xd9,0x02,0x04,0x00,0xfe]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+ds_add_f64 v1, v[2:3] offset:65535
+// GFX1250: ds_add_f64 v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v255, v[2:3] offset:65535
+// GFX1250: ds_add_f64 v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v255, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[254:255] offset:65535
+// GFX1250: ds_add_f64 v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[254:255] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3]
+// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3]
+// GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_f64 v1, v[2:3] offset:4
+// GFX1250: ds_add_f64 v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_f64 v1, v[2:3] offset:4
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4
+// GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4
+// GFX12-ERR-NEXT:{{^}}^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
index 1d14bd91a7569..7a4da255b5594 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vbuffer_mubuf.s
@@ -18,3 +18,303 @@ buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
// GFX12-ERR-NEXT:{{^}} ^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT:{{^}}buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[12:15], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[96:99], s3 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s101 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], m0 offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], v0, s[8:11], s3 offen offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3
+// GFX12-ERR-NEXT:{{^}}^
+
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 offset:7
+// GFX12-ERR-NEXT:{{^}}^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index c5288a76e5721..1ef40832e0460 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -285,6 +285,217 @@ flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset
// GFX1250: flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+flat_atomic_add_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_add_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_min_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[254:255], v[2:3] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[254:255], v[2:3] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[254:255] offset:4095
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[254:255] offset:4095
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
+// GFX12-ERR-NEXT:{{^}}^
+
+flat_atomic_max_f64 v[0:1], v[2:3] offset:7
+// GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] offset:7
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_add_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_min_num_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_min_num_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_max_num_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_max_num_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_min_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+global_atomic_max_f64 v[0:1], v[2:3], off
+// GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off
+// GFX12-ERR-NEXT:{{^}}^
+
+
global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
index e03c4327d9814..0870aa7ba3dc2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_ds.txt
@@ -17,3 +17,36 @@
# GFX1250: ds_atomic_barrier_arrive_rtn_b64 v[2:3], v2, v[4:5] offset:513 ; encoding: [0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02]
0x01,0x02,0xd4,0xd9,0x02,0x04,0x00,0x02
+
+# GFX1250: ds_add_f64 v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00]
+0xff,0xff,0x50,0xd9,0x01,0xfe,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3] ; encoding: [0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+0x00,0x00,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00]
+0x04,0x00,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00]
+0xff,0xff,0x50,0xd9,0x01,0x02,0x00,0x00
+
+# GFX1250: ds_add_f64 v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00]
+0xff,0xff,0x50,0xd9,0xff,0x02,0x00,0x00
+
+# GFX1250: ds_add_rtn_f64 v[254:255], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe]
+0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0xfe
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[254:255] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0x01,0xfe,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] ; encoding: [0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0x00,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:4 ; encoding: [0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0x04,0x00,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0x01,0x02,0x00,0x04
+
+# GFX1250: ds_add_rtn_f64 v[4:5], v255, v[2:3] offset:65535 ; encoding: [0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04]
+0xff,0xff,0xd0,0xd9,0xff,0x02,0x00,0x04
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
index a2f12115bb64b..2499225626acc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vbuffer_mubuf.txt
@@ -8,3 +8,93 @@
# GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0x40,0x15,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_min_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0xc0,0x16,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[12:15], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x18,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[96:99], s3 offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0xc0,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s101 offset:4095 ; encoding: [0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x65,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], m0 offset:4095 ; encoding: [0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00]
+0x7d,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x80,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], v0, s[8:11], s3 offen offset:4095 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x40,0x00,0xff,0x0f,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: buffer_atomic_max_num_f64 v[4:5], off, s[8:11], s3 offset:7 ; encoding: [0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00]
+0x03,0x00,0x17,0xc4,0x04,0x10,0x80,0x00,0x00,0x07,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
index 291192b53e320..c8eee9b36eb94 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -3177,6 +3177,75 @@
# GFX1250: global_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_add_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0x40,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_min_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0xc0,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0xfe,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[254:255] offset:4095 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x7f,0x00,0xff,0x0f,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: flat_atomic_max_num_f64 v[0:1], v[2:3] offset:7 ; encoding: [0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00]
+0x7c,0x00,0x17,0xec,0x00,0x00,0x00,0x01,0x00,0x07,0x00,0x00
+
+# GFX1250: global_atomic_add_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x40,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_min_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
+# GFX1250: global_atomic_max_num_f64 v[0:1], v[2:3], off ; encoding: [0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00]
+0x7c,0x00,0x17,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00
+
# GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00
More information about the llvm-commits
mailing list