[llvm] 6e279f5 - [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 7 04:49:39 PST 2022


Author: Dmitry Preobrazhensky
Date: 2022-11-07T15:49:21+03:00
New Revision: 6e279f5bb663b8edca53c1195edd11e3502677e1

URL: https://github.com/llvm/llvm-project/commit/6e279f5bb663b8edca53c1195edd11e3502677e1
DIFF: https://github.com/llvm/llvm-project/commit/6e279f5bb663b8edca53c1195edd11e3502677e1.diff

LOG: [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16

Differential Revision: https://reviews.llvm.org/D137332

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
    llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
    llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
    llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index bb2b918837c6e..fdbdfe5c47f9e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -660,12 +660,9 @@ let SubtargetPredicate = isGFX11Only in
 defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
 
 def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
-  let Src0RC64 = VRegSrc_32;
-  let Src1RC64 = SCSrc_b32;
-  let Src2RC64 = SCSrc_b32;
   let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
-                          IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
-                          IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
+                          IntOpSelMods:$src1_modifiers, SSrc_b32:$src1,
+                          IntOpSelMods:$src2_modifiers, SSrc_b32:$src2,
                           VGPR_32:$vdst_in, op_sel0:$op_sel);
   let HasClamp = 0;
   let HasExtVOP3DPP = 0;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index 862dfe7154fd3..6b233f9a59e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -27,9 +27,8 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src
 ; GCN-LABEL: {{^}}v_permlane16_b32_vll:
 ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
 ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
 ; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -124,9 +123,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vll:
 ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
 ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
 ; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
index d369973d56dd4..b05bab15e2008 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
@@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, 0.5, s3
 v_permlane16_b32 v5, v1, -4.0, s3
 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00]
 
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
 v_permlane16_b32 v5, v1, s2, s103
 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01]
 
@@ -12830,6 +12833,12 @@ v_permlane16_b32 v5, v1, s2, 0.5
 v_permlane16_b32 v5, v1, s2, -4.0
 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03]
 
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
 v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0]
 // GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00]
 
@@ -12923,6 +12932,9 @@ v_permlanex16_b32 v5, v1, 0.5, s3
 v_permlanex16_b32 v5, v1, -4.0, s3
 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00]
 
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
 v_permlanex16_b32 v5, v1, s2, s103
 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01]
 
@@ -12956,6 +12968,12 @@ v_permlanex16_b32 v5, v1, s2, 0.5
 v_permlanex16_b32 v5, v1, s2, -4.0
 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03]
 
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
 v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0]
 // GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00]
 

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 693e12fd01b6d..991ef34807e85 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -5287,6 +5287,15 @@ v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0]
 v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
 // GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01]
 
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
 v_permlanex16_b32 v5, v1, s2, s3
 // GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00]
 
@@ -5323,6 +5332,15 @@ v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0]
 v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
 // GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01]
 
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
 v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15]
 // GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index 66ce6b8b94fab..0785ba2ea2eb6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -16044,6 +16044,9 @@
 # GFX10: v_permlane16_b32 v5, v1, s103, s3       ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00]
 0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00
 
+# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
 # GFX10: v_permlane16_b32 v5, v1, s2, -1         ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03]
 0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03
 
@@ -16071,6 +16074,9 @@
 # GFX10: v_permlane16_b32 v5, v1, s2, s3         ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00]
 0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00
 
+# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
 # GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00]
 0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00
 
@@ -16149,6 +16155,9 @@
 # GFX10: v_permlanex16_b32 v5, v1, s103, s3      ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00]
 0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00
 
+# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
 # GFX10: v_permlanex16_b32 v5, v1, s2, -1        ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03]
 0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03
 
@@ -16176,6 +16185,9 @@
 # GFX10: v_permlanex16_b32 v5, v1, s2, s3        ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00]
 0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00
 
+# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
 # GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00]
 0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00
 


        


More information about the llvm-commits mailing list