[llvm-branch-commits] [llvm] 409a2f0 - [AMDGPU] Allow no saddr for global addtid insts

Sebastian Neubauer via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 16 01:06:02 PST 2020


Author: Sebastian Neubauer
Date: 2020-12-16T10:01:40+01:00
New Revision: 409a2f0f9e4847cd25560bfbddf22ffa11d15237

URL: https://github.com/llvm/llvm-project/commit/409a2f0f9e4847cd25560bfbddf22ffa11d15237
DIFF: https://github.com/llvm/llvm-project/commit/409a2f0f9e4847cd25560bfbddf22ffa11d15237.diff

LOG: [AMDGPU] Allow no saddr for global addtid insts

I think the global_load/store_dword_addtid instructions support
switching off the scalar address.
Add assembler and disassembler support for this.

Differential Revision: https://reviews.llvm.org/D93288

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/test/MC/AMDGPU/gfx1030_new.s
    llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index d47a79414294..57a355a55a02 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -192,24 +192,34 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
 }
 
 class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
-  bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
+  bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
   (outs regClass:$vdst),
-  !con((ins SReg_64:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
+  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
+    (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
     !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
-  " $vdst, $saddr$offset$glc$slc$dlc"> {
+  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
   let is_flat_global = 1;
   let has_data = 0;
   let mayLoad = 1;
   let has_vaddr = 0;
   let has_saddr = 1;
-  let enabled_saddr = 1;
+  let enabled_saddr = EnableSaddr;
   let maybeAtomic = 1;
+  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
 
   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
   let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
 }
 
+multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
+  bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
+  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
+    GlobalSaddrTable<0, opName>;
+  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
+    GlobalSaddrTable<1, opName>;
+}
+
 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
   let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
     def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
@@ -220,21 +230,29 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
 }
 
 class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
-  bit HasSignedOffset = 0> : FLAT_Pseudo<
+  bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
   (outs),
-  !con(
-    (ins vdataClass:$vdata, SReg_64:$saddr),
-      (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
-  " $vdata, $saddr$offset$glc$slc$dlc"> {
+  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
+    (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
   let is_flat_global = 1;
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
   let has_vaddr = 0;
   let has_saddr = 1;
-  let enabled_saddr = 1;
+  let enabled_saddr = EnableSaddr;
   let maybeAtomic = 1;
+  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
+  bit HasSignedOffset = 0> {
+  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
+    GlobalSaddrTable<0, opName>;
+  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
+    GlobalSaddrTable<1, opName>;
 }
 
 class FlatScratchInst <string sv_op, string mode> {
@@ -603,7 +621,7 @@ defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_
 defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
 let OtherPredicates = [HasGFX10_BEncoding] in
-def  GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
+defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
 
 defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
 defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
@@ -612,7 +630,7 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VR
 defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
 defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
 let OtherPredicates = [HasGFX10_BEncoding] in
-def  GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
+defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
 
 defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
@@ -1651,8 +1669,8 @@ defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
 defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
 defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
 defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
-defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_Base_gfx10<0x016>;
-defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_Base_gfx10<0x017>;
+defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
+defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
 
 // ENC_FLAT_SCRATCH.
 defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;

diff  --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s
index 94f4ff3a237a..a1fe1a040086 100644
--- a/llvm/test/MC/AMDGPU/gfx1030_new.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_new.s
@@ -12,6 +12,15 @@ global_load_dword_addtid v1, s[2:3] offset:16 glc slc dlc
 global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
 // GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00]
 
+global_load_dword_addtid v1, off offset:16
+// GFX10: encoding: [0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01]
+
+global_load_dword_addtid v1, off offset:16 glc slc dlc
+// GFX10: encoding: [0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01]
+
+global_store_dword_addtid v1, off offset:16 glc slc dlc
+// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00]
+
 global_store_dword v254, v1, s[2:3] offset:16
 // GFX10: encoding: [0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
index 0d7a3f032eac..1445b9a46fb0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
@@ -12,6 +12,15 @@
 # GFX10: global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
 0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00
 
+# GFX10: global_load_dword_addtid v1, off offset:16
+0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01
+
+# GFX10: global_load_dword_addtid v1, off offset:16 glc slc dlc
+0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01
+
+# GFX10: global_store_dword_addtid v1, off offset:16 glc slc dlc
+0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00
+
 # GFX10: global_store_dword v254, v1, s[2:3] offset:16
 0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00
 


        


More information about the llvm-branch-commits mailing list