[llvm] [AMDGPU][MC] Fix disassembler problem for image_atomic with TFE (PR #112622)
Jun Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 14:24:58 PDT 2024
https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/112622
>From 73de6a8d80e468bc89b684ee1bc356ff9e85b470 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Wed, 16 Oct 2024 14:46:50 -0700
Subject: [PATCH 1/3] [AMDGPU][MC] Fix disassembler problem for image_atomic
with TFE
For image_atomic instructions with TFE, in some cases (e.g., when
dmask=3) the disassembler produces dst register with wrong size
(e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead
of v[5:7]).
---
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 9 +-
.../AMDGPU/gfx10_mimg_features.txt | 84 +++++++++++++++
.../AMDGPU/gfx11_dasm_mimg_features.txt | 83 ++++++++++++++
.../AMDGPU/gfx12_dasm_vimage_features.txt | 101 ++++++++++++++++++
.../AMDGPU/gfx8_mimg_features.txt | 85 ++++++++++++++-
.../AMDGPU/gfx9_mimg_features.txt | 83 ++++++++++++++
6 files changed, 439 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 5c49a8116ae7fc..f52392ba1bd19c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1123,10 +1123,13 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 2, 2) in
+ let VDataDwords = !if(isCmpSwap, 3, 3) in
defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 4, 4) in
- defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+
+ if isCmpSwap then {
+ let VDataDwords = 5 in
+ defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+ }
}
} // End IsAtomicRet = 1
}
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
new file mode 100644
index 00000000000000..133af6b0c1da4d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX1010
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+
+# GFX1010: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
index 46488a9aa4ec70..fdf376ed6d6932 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
@@ -135,6 +135,89 @@
# GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00]
0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX11: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x50,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x50,0xf0,0x01,0x05,0x22,0x00
+
+
# GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64]
0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
new file mode 100644
index 00000000000000..2e747adbd07205
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
@@ -0,0 +1,101 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX12: image_atomic_add_flt v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x60,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_flt v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xe0,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd3,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x42,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 0a5bafc55f4d43..1bb1014a431d37 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -184,6 +184,85 @@
# VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00
+# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
+
#===------------------------------------------------------------------------===#
# Invalid image atomics (incorrect dmask value).
# Disassembler may produce a partially incorrect instruction but should not fail.
@@ -192,10 +271,10 @@
# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00
# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]
@@ -204,7 +283,7 @@
# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00
#===------------------------------------------------------------------------===#
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
new file mode 100644
index 00000000000000..8300fe4e9db50b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
@@ -0,0 +1,83 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX900
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+# GFX900: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
>From 1301661b2c9adcdaab7a1009627ae74e5d25df42 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Mon, 21 Oct 2024 12:07:04 -0700
Subject: [PATCH 2/3] A minor code change.
---
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index f52392ba1bd19c..8588b601cb0bf2 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1123,7 +1123,7 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 3, 3) in
+ let VDataDwords = 3 in
defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
if isCmpSwap then {
>From 0adf671a1270bb8b1f3d5b30140a76a0909af9bd Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Wed, 23 Oct 2024 14:20:45 -0700
Subject: [PATCH 3/3] Make the _Vx suffix align with reg size. Atomics other
than cmpswap have V1-V3, while cmpswap has V2-V5.
---
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 14 +++--
.../llvm.amdgcn.image.atomic.dim.mir | 60 +++++++++----------
llvm/test/CodeGen/AMDGPU/release-vgprs.mir | 4 +-
3 files changed, 41 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 8588b601cb0bf2..156d1f867cdf6b 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1119,16 +1119,20 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
// _V* variants have different dst size, but the size is encoded implicitly,
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
// Other variants are reconstructed by disassembler using dmask and tfe.
- let VDataDwords = !if(isCmpSwap, 2, 1) in
- defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 4, 2) in
- defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
+ if !not(isCmpSwap) then
+ let VDataDwords = 1 in
+ defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>;
+
+ let VDataDwords = 2 in
+ defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>;
let VDataDwords = 3 in
defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
if isCmpSwap then {
+ let VDataDwords = 4 in
+ defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_128, 0, isFP, renamed>;
let VDataDwords = 5 in
- defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+ defm _V5 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
}
}
} // End IsAtomicRet = 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
index fb8c76bb6f51d6..292fa4be1ca1d6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
@@ -21,8 +21,8 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0
; GFX6-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: atomic_cmpswap_i32_1d
@@ -31,8 +31,8 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0
; GFX8-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10-LABEL: name: atomic_cmpswap_i32_1d
@@ -41,8 +41,8 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0
; GFX10-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX11-LABEL: name: atomic_cmpswap_i32_1d
@@ -51,8 +51,8 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_]].sub0
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0
; GFX11-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX12-LABEL: name: atomic_cmpswap_i32_1d
@@ -61,8 +61,8 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
- ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_]].sub0
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_]].sub0
; GFX12-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
@@ -89,7 +89,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -97,7 +97,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -105,7 +105,7 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -113,7 +113,7 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -121,7 +121,7 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -146,8 +146,8 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
- ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX8-LABEL: name: atomic_cmpswap_i64_1d
@@ -156,8 +156,8 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX10-LABEL: name: atomic_cmpswap_i64_1d
@@ -166,8 +166,8 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX11-LABEL: name: atomic_cmpswap_i64_1d
@@ -176,8 +176,8 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
- ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0_sub1
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX12-LABEL: name: atomic_cmpswap_i64_1d
@@ -186,8 +186,8 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
- ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_]].sub0_sub1
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_]].sub0_sub1
; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
@@ -214,7 +214,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
@@ -222,7 +222,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
@@ -230,7 +230,7 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
@@ -238,7 +238,7 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
@@ -246,7 +246,7 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 25804123eff181..ea07619efc681a 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -547,9 +547,9 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: image_atomic
- ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
+ ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
; CHECK-NEXT: S_ENDPGM 0
- renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
+ renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
S_ENDPGM 0
...
More information about the llvm-commits
mailing list