[llvm] 6181458 - [AMDGPU] gfx940 MUBUF format changes
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 11 11:36:57 PST 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-11T11:36:49-08:00
New Revision: 61814586620deca51ecf6477e19c6afa8e28ad90
URL: https://github.com/llvm/llvm-project/commit/61814586620deca51ecf6477e19c6afa8e28ad90
DIFF: https://github.com/llvm/llvm-project/commit/61814586620deca51ecf6477e19c6afa8e28ad90.diff
LOG: [AMDGPU] gfx940 MUBUF format changes
Differential Revision: https://reviews.llvm.org/D121234
Added:
Modified:
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/MC/AMDGPU/gfx940_asm_features.s
llvm/test/MC/AMDGPU/gfx940_err.s
llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index f968ce21e43b7..0b7aebd4e328f 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1175,8 +1175,13 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
let SubtargetPredicate = isGFX90APlus in {
def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> {
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
}
def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
+ let SubtargetPredicate = isGFX90AOnly;
}
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
@@ -1184,6 +1189,14 @@ let SubtargetPredicate = isGFX90APlus in {
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
} // End SubtargetPredicate = isGFX90APlus
+def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
+ let SubtargetPredicate = isGFX940Plus;
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
+}
+
let SubtargetPredicate = isGFX10Plus in {
def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
@@ -2366,9 +2379,28 @@ class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
let Inst{55} = acc;
}
+class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
+ let DecoderNamespace = "GFX9";
+ let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
+
+ let Inst{55} = acc;
+}
+
multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
def _vi : MUBUF_Real_vi<op, ps>;
- def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
+
+ foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
+
+ foreach _ = BoolToList<ps.FPAtomic>.ret in {
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
+ let SubtargetPredicate = isGFX90AOnly;
+ let AssemblerPredicate = isGFX90AOnly;
+ }
+ def _gfx940 : MUBUF_Real_gfx940<op, ps>;
+ }
}
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@@ -2558,9 +2590,17 @@ let SubtargetPredicate = isGFX90APlus in {
} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
+ let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
+ let AssemblerPredicate = isGFX90AOnly;
+ let SubtargetPredicate = isGFX90AOnly;
}
def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>;
+let SubtargetPredicate = isGFX940Plus in {
+def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
+}
+
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
MTBUF_Real<ps>,
Enc64,
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index d1ce9680c0328..8c8609c3c7100 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -1360,7 +1360,9 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// to initiate writeback of any dirty cache lines of earlier writes by the
// same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
// writeback has completed.
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+ // Set SC bits to indicate system scope.
+ .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
// Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
// vmcnt(0)" needed by the "BUFFER_WBL2".
Changed = true;
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
index d3f290725a120..1c7c502af9f2b 100644
--- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -149,6 +149,33 @@ v_mov_b64 v[2:3], 1
// GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
v_mov_b64 v[2:3], 0x64
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+buffer_wbl2 sc0 sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc1
+
+// NOT-GFX940: error: instruction not supported on this GPU
+// GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+buffer_inv sc0 sc1
+
// NOT-GFX940: error: invalid operand for instruction
// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
buffer_atomic_swap v5, off, s[8:11], s3 sc0
@@ -224,3 +251,28 @@ global_atomic_min_f64 v[0:1], v[2:3], off sc1
// GFX10: error: instruction not supported on this GPU
// GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
global_atomic_max_f64 v[0:1], v[2:3], off sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1
+
+// GFX90A: error: invalid operand for instruction
+// GFX10: error: instruction not supported on this GPU
+// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1
diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s
index b5ca08d381aad..832754d4a7600 100644
--- a/llvm/test/MC/AMDGPU/gfx940_err.s
+++ b/llvm/test/MC/AMDGPU/gfx940_err.s
@@ -31,6 +31,9 @@ v_mov_b64 v[2:3], v[4:5] dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWOR
v_mov_b64_sdwa v[2:3], v[4:5]
// GFX940: error: sdwa variant of this instruction is not supported
+buffer_invl2
+// GFX940: error: instruction not supported on this GPU
+
global_load_dword v2, v[2:3], off glc
// GFX940: error: invalid operand for instruction
@@ -48,3 +51,9 @@ buffer_atomic_swap v5, off, s[8:11], s3 glc
buffer_atomic_swap v5, off, s[8:11], s3 slc
// GFX940: error: invalid operand for instruction
+
+buffer_wbl2 glc
+// GFX940: error: invalid operand for instruction
+
+buffer_wbl2 scc
+// GFX940: error: invalid operand for instruction
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
index c187208e6a819..cf8844889f19a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
@@ -102,6 +102,24 @@
# GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00]
0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00
+# GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00
+
+# GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00]
+0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00
+
# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03]
0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03
@@ -149,3 +167,18 @@
# GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00]
0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00
+
+# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03
+
+# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03]
+0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03
More information about the llvm-commits
mailing list