[llvm] 91f4650 - [AMDGPU][MC][GFX10] Corrected global_atomic_fcmpswap*

Mon Nov 15 01:48:50 PST 2021

Author: Dmitry Preobrazhensky
Date: 2021-11-15T12:51:12+03:00
New Revision: 91f4650ebb057de7f6ed6d038985b2d53d144e15

URL: https://github.com/llvm/llvm-project/commit/91f4650ebb057de7f6ed6d038985b2d53d144e15
DIFF: https://github.com/llvm/llvm-project/commit/91f4650ebb057de7f6ed6d038985b2d53d144e15.diff

LOG: [AMDGPU][MC][GFX10] Corrected global_atomic_fcmpswap*

Corrected src data size of global_atomic_fcmpswap and global_atomic_fcmpswap_x2 opcodes.

Differential Revision: https://reviews.llvm.org/D113746

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
    llvm/test/MC/AMDGPU/gfx10_asm_flat.s
    llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index e132edb7c7254..bb0aa648ff90a 100644

--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -779,13 +779,13 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
 
 let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
   defm GLOBAL_ATOMIC_FCMPSWAP :
-    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
   defm GLOBAL_ATOMIC_FMIN :
     FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>;
   defm GLOBAL_ATOMIC_FMAX :
     FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>;
   defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
-    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
   defm GLOBAL_ATOMIC_FMIN_X2 :
     FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
   defm GLOBAL_ATOMIC_FMAX_X2 :

diff  --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
index c4de9f3623657..d5f93156b23ab 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
@@ -152,7 +152,7 @@ body:            |
 name:            global_atomic_fcmpswap_to_s_denorm_mode
 body:            |
   bb.0:
-    GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 
@@ -164,7 +164,7 @@ body:            |
 name:            global_atomic_fcmpswap_x2_to_s_denorm_mode
 body:            |
   bb.0:
-    GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 
@@ -224,7 +224,7 @@ body:            |
 name:            global_atomic_fcmpswap_rtn_to_s_denorm_mode
 body:            |
   bb.0:
-    %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 
@@ -236,7 +236,7 @@ body:            |
 name:            global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
 body:            |
   bb.0:
-    %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 
@@ -296,7 +296,7 @@ body:            |
 name:            global_atomic_fcmpswap_saddr_to_s_denorm_mode
 body:            |
   bb.0:
-    GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 
@@ -308,7 +308,7 @@ body:            |
 name:            global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
 body:            |
   bb.0:
-    %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
+    %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_128, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`)
     S_DENORM_MODE 0, implicit-def $mode, implicit $mode
 ...
 

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s
index 388327205adb5..a3c666bad533a 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s
@@ -225,6 +225,46 @@ flat_atomic_xor v[1:2], v2 dlc
 flat_atomic_xor_x2 v[1:2], v[2:3] dlc
 // GFX10: [0x00,0x10,0x6c,0xdd,0x01,0x02,0x7d,0x00]
 
+//===----------------------------------------------------------------------===//
+// ENC_FLAT_GLOBAL: fcmpswap.
+//===----------------------------------------------------------------------===//
+
+global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1
+// GFX10: [0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap v5, v[1:2], v[2:3], off offset:-1 glc
+// GFX10: [0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05]
+
+global_atomic_fcmpswap v[1:2], v[2:3], off offset:2047
+// GFX10: [0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap v[1:2], v[2:3], off offset:-2048
+// GFX10: [0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 slc
+// GFX10: [0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 dlc
+// GFX10: [0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1
+// GFX10: [0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap_x2 v[5:6], v[1:2], v[2:5], off offset:-1 glc
+// GFX10: [0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05]
+
+global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:2047
+// GFX10: [0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-2048
+// GFX10: [0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 slc
+// GFX10: [0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00]
+
+global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 dlc
+// GFX10: [0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00]
+
 //===----------------------------------------------------------------------===//
 // ENC_FLAT_GLOBAL: dlc support for atomics.
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt
index 779e39c69c329..60aef3ce146b0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt
@@ -267,3 +267,43 @@
 
 # CHECK: global_atomic_xor_x2 v[1:2], v[2:3], off dlc ; encoding: [0x00,0x90,0x6c,0xdd,0x01,0x02,0x7d,0x00]
 0x00,0x90,0x6c,0xdd,0x01,0x02,0x7d,0x00
+
+#===------------------------------------------------------------------------===#
+# FLAT GLOBAL opcodes: fcmpswap
+#===------------------------------------------------------------------------===#
+
+# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap v5, v[1:2], v[2:3], off offset:-1 glc ; encoding: [0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05]
+0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05
+
+# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:2047 ; encoding: [0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-2048 ; encoding: [0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 slc ; encoding: [0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00]
+0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 dlc ; encoding: [0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00]
+0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00]
+0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap_x2 v[5:6], v[1:2], v[2:5], off offset:-1 glc ; encoding: [0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05]
+0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05
+
+# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:2047 ; encoding: [0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00]
+0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-2048 ; encoding: [0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00]
+0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 slc ; encoding: [0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00]
+0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00
+
+# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 dlc ; encoding: [0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00]
+0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00