[llvm] [AMDGPU] Support tfe operand in image_atomic instructions (PR #92469)

Jun Wang via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 16:06:35 PDT 2024


https://github.com/jwanggit86 created https://github.com/llvm/llvm-project/pull/92469

Current, if an image_atomic instruction has the 'tfe' operand, the llvm-mc assembler in general would reject it. The only exception is when dmask is 0x1 and the instruction is not image_atomic_cmpswap (e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This patch fixes this problem and allows tfe to be specified in image_atomic instructions.

>From 9914677223f35766ae038eb4f660ec230b262cb8 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Thu, 16 May 2024 17:51:50 -0500
Subject: [PATCH] [AMDGPU] Support tfe operand in image_atomic instructions

Current, if an image_atomic instruction has the 'tfe' operand, the
llvm-mc assembler in general would reject it. The only exception is
when dmask is 0x1 and the instruction is not image_atomic_cmpswap
(e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This
patch fixes this problem and allows tfe to be specified in
image_atomic instructions.
---
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  4 +++
 llvm/test/MC/AMDGPU/gfx10_asm_mimg.s          | 25 ++++++++++++++
 llvm/test/MC/AMDGPU/gfx11_asm_mimg.s          | 25 ++++++++++++++
 llvm/test/MC/AMDGPU/mimg.s                    | 33 +++++++++++++++++++
 .../AMDGPU/gfx8_mimg_features.txt             |  2 +-
 5 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 351263d079768..ebd7acdeef5a5 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1101,6 +1101,10 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
       defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
       let VDataDwords = !if(isCmpSwap, 4, 2) in
       defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
+      let VDataDwords = !if(isCmpSwap, 2, 2) in
+      defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
+      let VDataDwords = !if(isCmpSwap, 4, 4) in
+      defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
     }
   } // End IsAtomicRet = 1
 }
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s b/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
index 7b137289aa817..96b8959038296 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
@@ -654,3 +654,28 @@ image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dma
 
 image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x40]
+
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x44,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x45,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x44,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x45,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x40,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x41,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x40,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x0a,0x00,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
index 6d467dfa1d8e1..d873932b75cba 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
@@ -5603,3 +5603,28 @@ image_store_pck v1, v[2:3], s[96:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm a1
 
 image_store_pck v255, v[254:255], ttmp[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe
 // GFX11: [0x98,0x74,0x21,0xf0,0xfe,0xff,0x5d,0x00]
+
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x24,0x00]
diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s
index 38927b40f3347..fd8fd5afbc487 100644
--- a/llvm/test/MC/AMDGPU/mimg.s
+++ b/llvm/test/MC/AMDGPU/mimg.s
@@ -439,6 +439,39 @@ image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc
 // SICI:  image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x40,0xf0,0xc0,0x04,0x07,0x00]
 // GFX89: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x44,0xf0,0xc0,0x04,0x07,0x00]
 
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v4, v10, s[8:15] dmask:0x1
+// SICI:  image_atomic_add v4, v10, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x44,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v4, v10, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x48,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe
+// SICI:  image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe ; encoding: [0x00,0x01,0x45,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe ; encoding: [0x00,0x01,0x49,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:5], v10, s[8:15] dmask:0x3
+// SICI:  image_atomic_add v[4:5], v10, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:5], v10, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x48,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe
+// SICI:  image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe ; encoding: [0x00,0x03,0x45,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe ; encoding: [0x00,0x03,0x49,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3
+// SICI:  image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 ; encoding: [0x00,0x03,0x40,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe
+// SICI:  image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe ; encoding: [0x00,0x03,0x41,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe ; encoding: [0x00,0x03,0x45,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf
+// SICI:  image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf ; encoding: [0x00,0x0f,0x40,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf ; encoding: [0x00,0x0f,0x44,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe
+// SICI:  image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe ; encoding: [0x00,0x0f,0x41,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe ; encoding: [0x00,0x0f,0x45,0xf0,0xc0,0x04,0x07,0x00]
+
 // FIXME: This test is incorrect because r128 assumes a 128-bit SRSRC.
 image_atomic_add v10, v6, s[8:15] dmask:0x1 r128
 // SICI: image_atomic_add v10, v6, s[8:15] dmask:0x1 r128 ; encoding: [0x00,0x81,0x44,0xf0,0x06,0x0a,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 292af1850db86..0a5bafc55f4d4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -195,7 +195,7 @@
 # VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00
 
-# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00
 
 # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]



More information about the llvm-commits mailing list