[llvm] 6e279f5 - [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 7 04:49:39 PST 2022
Author: Dmitry Preobrazhensky
Date: 2022-11-07T15:49:21+03:00
New Revision: 6e279f5bb663b8edca53c1195edd11e3502677e1
URL: https://github.com/llvm/llvm-project/commit/6e279f5bb663b8edca53c1195edd11e3502677e1
DIFF: https://github.com/llvm/llvm-project/commit/6e279f5bb663b8edca53c1195edd11e3502677e1.diff
LOG: [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16
Differential Revision: https://reviews.llvm.org/D137332
Added:
Modified:
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index bb2b918837c6e..fdbdfe5c47f9e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -660,12 +660,9 @@ let SubtargetPredicate = isGFX11Only in
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
- let Src0RC64 = VRegSrc_32;
- let Src1RC64 = SCSrc_b32;
- let Src2RC64 = SCSrc_b32;
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
- IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
- IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
+ IntOpSelMods:$src1_modifiers, SSrc_b32:$src1,
+ IntOpSelMods:$src2_modifiers, SSrc_b32:$src2,
VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasClamp = 0;
let HasExtVOP3DPP = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index 862dfe7154fd3..6b233f9a59e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -27,9 +27,8 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src
; GCN-LABEL: {{^}}v_permlane16_b32_vll:
; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
%v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
store i32 %v, i32 addrspace(1)* %out
@@ -124,9 +123,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr
; GCN-LABEL: {{^}}v_permlanex16_b32_vll:
; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
; GFX10PLUS-NOT: v_readfirstlane_b32
-; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}}
define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
%v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
store i32 %v, i32 addrspace(1)* %out
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
index d369973d56dd4..b05bab15e2008 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s
@@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, 0.5, s3
v_permlane16_b32 v5, v1, -4.0, s3
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00]
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
v_permlane16_b32 v5, v1, s2, s103
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01]
@@ -12830,6 +12833,12 @@ v_permlane16_b32 v5, v1, s2, 0.5
v_permlane16_b32 v5, v1, s2, -4.0
// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03]
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0]
// GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00]
@@ -12923,6 +12932,9 @@ v_permlanex16_b32 v5, v1, 0.5, s3
v_permlanex16_b32 v5, v1, -4.0, s3
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00]
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
v_permlanex16_b32 v5, v1, s2, s103
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01]
@@ -12956,6 +12968,12 @@ v_permlanex16_b32 v5, v1, s2, 0.5
v_permlanex16_b32 v5, v1, s2, -4.0
// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03]
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0]
// GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 693e12fd01b6d..991ef34807e85 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -5287,6 +5287,15 @@ v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0]
v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01]
+v_permlane16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlane16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_permlanex16_b32 v5, v1, s2, s3
// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00]
@@ -5323,6 +5332,15 @@ v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0]
v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01]
+v_permlanex16_b32 v5, v1, 0xaf123456, s3
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, s2, 0xaf123456
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+
+v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678
+// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+
v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15]
// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index 66ce6b8b94fab..0785ba2ea2eb6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -16044,6 +16044,9 @@
# GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00]
0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00
+# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03]
0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03
@@ -16071,6 +16074,9 @@
# GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00
+# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00
@@ -16149,6 +16155,9 @@
# GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00]
0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00
+# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03]
0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03
@@ -16176,6 +16185,9 @@
# GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00
+# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf]
+0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf
+
# GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00]
0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00
More information about the llvm-commits
mailing list