[llvm] 64c511f - [AMDGPU] Add additional aliases for load transpose instructions (#163900)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 17 10:01:05 PDT 2025


Author: Changpeng Fang
Date: 2025-10-17T10:01:02-07:00
New Revision: 64c511f6a88f607798db29a1af59aab4dd13ff20

URL: https://github.com/llvm/llvm-project/commit/64c511f6a88f607798db29a1af59aab4dd13ff20
DIFF: https://github.com/llvm/llvm-project/commit/64c511f6a88f607798db29a1af59aab4dd13ff20.diff

LOG: [AMDGPU] Add additional aliases for load transpose instructions (#163900)

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/DSInstructions.td
    llvm/lib/Target/AMDGPU/FLATInstructions.td
    llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s
    llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index d0ad120e7ca65..b841171c285d8 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1488,6 +1488,12 @@ let AssemblerPredicate = isGFX12Plus in {
 def : MnemonicAlias<"ds_load_tr_b64", "ds_load_tr8_b64">, Requires<[isGFX1250Plus]>;
 def : MnemonicAlias<"ds_load_tr_b128", "ds_load_tr16_b128">, Requires<[isGFX1250Plus]>;
 
+// Additional aliases for ds load transpose instructions.
+def : MnemonicAlias<"ds_load_b64_tr_b8", "ds_load_tr8_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b128_tr_b16", "ds_load_tr16_b128">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b64_tr_b4", "ds_load_tr4_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"ds_load_b96_tr_b6", "ds_load_tr6_b96">, Requires<[isGFX125xOnly]>;
+
 //===----------------------------------------------------------------------===//
 // GFX11.
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 6de59be7665b4..8ea64d17417f7 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -3711,6 +3711,12 @@ defm GLOBAL_LOAD_TR_B64_w32           : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
 defm GLOBAL_LOAD_TR4_B64              : VFLAT_Real_AllAddr_gfx1250<0x073>;
 defm GLOBAL_LOAD_TR6_B96              : VFLAT_Real_AllAddr_gfx1250<0x074>;
 
+// Additional aliases for global load transpose instructions.
+def : MnemonicAlias<"global_load_b128_tr_b16", "global_load_tr16_b128">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b64_tr_b8", "global_load_tr8_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b64_tr_b4", "global_load_tr4_b64">, Requires<[isGFX125xOnly]>;
+def : MnemonicAlias<"global_load_b96_tr_b6", "global_load_tr6_b96">, Requires<[isGFX125xOnly]>;
+
 defm FLAT_ATOMIC_ADD_F64              : VFLAT_Real_Atomics_gfx1250<0x055>;
 defm FLAT_ATOMIC_MIN_F64              : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
 defm FLAT_ATOMIC_MAX_F64              : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s
index 5b6bb477bdc91..83313a29657f2 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vds_alias.s
@@ -5,3 +5,15 @@ ds_load_tr_b64 v[2:3], v0
 
 ds_load_tr_b128 v[2:5], v0
 // GFX1250: ds_load_tr16_b128 v[2:5], v0            ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02]
+
+ds_load_b128_tr_b16 v[2:5], v0
+// GFX1250: ds_load_tr16_b128 v[2:5], v0            ; encoding: [0x00,0x00,0xf0,0xdb,0x00,0x00,0x00,0x02]
+
+ds_load_b64_tr_b8 v[2:3], v0
+// GFX1250: ds_load_tr8_b64 v[2:3], v0              ; encoding: [0x00,0x00,0xf4,0xdb,0x00,0x00,0x00,0x02]
+
+ds_load_b64_tr_b4 v[2:3], v0
+// GFX1250: ds_load_tr4_b64 v[2:3], v0              ; encoding: [0x00,0x00,0xe8,0xdb,0x00,0x00,0x00,0x02]
+
+ds_load_tr6_b96 v[2:4], v0
+// GFX1250: ds_load_tr6_b96 v[2:4], v0              ; encoding: [0x00,0x00,0xec,0xdb,0x00,0x00,0x00,0x02]

diff  --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s
index 6b2dd67b073e3..f983bc0b3dfca 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_alias.s
@@ -35,3 +35,78 @@ global_load_tr_b128 v[2:5], v[6:7], off offset:64
 
 global_load_tr_b128 v[2:5], v[6:7], off offset:-64
 // GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]
+
+global_load_b64_tr_b8 v[2:3], v0, s[0:1]
+// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1]  ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:64
+// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_b64_tr_b8 v[2:3], v0, s[0:1] offset:-64
+// GFX1250: global_load_tr8_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_b64_tr_b8 v[2:3], v[4:5], off
+// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00]
+
+global_load_b64_tr_b8 v[2:3], v[4:5], off offset:64
+// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00]
+
+global_load_b64_tr_b8 v[2:3], v[4:5], off offset:-64
+// GFX1250: global_load_tr8_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0x00,0x16,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff]
+
+global_load_b128_tr_b16 v[2:5], v0, s[0:1]
+// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:64
+// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_b128_tr_b16 v[2:5], v0, s[0:1] offset:-64
+// GFX1250: global_load_tr16_b128 v[2:5], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_b128_tr_b16 v[2:5], v[6:7], off
+// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00]
+
+global_load_b128_tr_b16 v[2:5], v[6:7], off offset:64
+// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00]
+
+global_load_b128_tr_b16 v[2:5], v[6:7], off offset:-64
+// GFX1250: global_load_tr16_b128 v[2:5], v[6:7], off offset:-64 ; encoding: [0x7c,0xc0,0x15,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]
+
+global_load_b64_tr_b4 v[2:3], v0, s[0:1]
+// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1]  ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:64
+// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_b64_tr_b4 v[2:3], v0, s[0:1] offset:-64
+// GFX1250: global_load_tr4_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_b64_tr_b4 v[2:3], v[4:5], off
+// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00]
+
+global_load_b64_tr_b4 v[2:3], v[4:5], off offset:64
+// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0x40,0x00,0x00]
+
+global_load_b64_tr_b4 v[2:3], v[4:5], off offset:-64
+// GFX1250: global_load_tr4_b64 v[2:3], v[4:5], off offset:-64 ; encoding: [0x7c,0xc0,0x1c,0xee,0x02,0x00,0x00,0x00,0x04,0xc0,0xff,0xff]
+
+global_load_b96_tr_b6 v[2:4], v0, s[0:1]
+// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1]  ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+global_load_b96_tr_b6 v[3:5], v0, s[0:1]
+// GFX1250: global_load_tr6_b96 v[3:5], v0, s[0:1]  ; encoding: [0x00,0x00,0x1d,0xee,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:64
+// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_b96_tr_b6 v[2:4], v0, s[0:1] offset:-64
+// GFX1250: global_load_tr6_b96 v[2:4], v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_b96_tr_b6 v[2:4], v[6:7], off
+// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00]
+
+global_load_b96_tr_b6 v[2:4], v[6:7], off offset:64
+// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0x40,0x00,0x00]
+
+global_load_b96_tr_b6 v[2:4], v[6:7], off offset:-64
+// GFX1250: global_load_tr6_b96 v[2:4], v[6:7], off offset:-64 ; encoding: [0x7c,0x00,0x1d,0xee,0x02,0x00,0x00,0x00,0x06,0xc0,0xff,0xff]


        


More information about the llvm-commits mailing list