[llvm] ab927a9 - [AMDGPU] Fix operand class of v_ldexp_f16 src1

Mon Jun 19 07:43:39 PDT 2023

Author: Joe Nash
Date: 2023-06-19T10:43:10-04:00
New Revision: ab927a998397c30fd3426d9aa1e071206fa554b2

URL: https://github.com/llvm/llvm-project/commit/ab927a998397c30fd3426d9aa1e071206fa554b2
DIFF: https://github.com/llvm/llvm-project/commit/ab927a998397c30fd3426d9aa1e071206fa554b2.diff

LOG: [AMDGPU] Fix operand class of v_ldexp_f16 src1

Patch eece6ba283bd changed the src1 type of v_ldexp_f16 from i32 to
i16. Though semantically src1 is an i16, the hardware reads this operand as an
f16 type, which primarily enables floating point inline constants.
Therefore this patch changes the operand type to f16. It maintains the
current behavior where floating point source modifiers are not allowed
on src1. SDWA sext modifier continues to be allowed.
The test asm and disasm test changes in eece6ba283bd are reverted,
because the floating point inline constants are allowed.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D153169

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
    llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
    llvm/test/MC/AMDGPU/gfx8_asm_vop3.s
    llvm/test/MC/AMDGPU/gfx9_asm_vop3.s
    llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
    llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt
    llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt
    llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 35429a44c93e3..ea7ab163992ea 100644

--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -863,9 +863,18 @@ def :  divergent_i64_BinOp <xor, V_XOR_B32_e64>;
 // 16-Bit Operand Instructions
 //===----------------------------------------------------------------------===//
 
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I16> {
-  // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware
-  // encoding treats src1 as an f16
+// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware
+// encoding treats src1 as an f16
+def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
+  let Src1Mod = Int32InputMods;
+  let Src1ModDPP = IntVRegInputMods;
+  let Src1ModVOP3DPP = IntVRegInputMods;
+  // SDWA sext is the only modifier allowed.
+  let HasSrc1IntMods = 1;
+  let HasSrc1FloatMods = 0;
+  let Src1ModSDWA = Int16SDWAInputMods;
+}
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
   let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
   let Src1DPP = VGPR_32_Lo128;
   let Src1ModDPP = IntT16VRegInputMods;
@@ -874,9 +883,9 @@ def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I16> {
 let isReMaterializable = 1 in {
 let FPDPRounding = 1 in {
   let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts]  in
-    defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16, any_fldexp>;
+    defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
   let SubtargetPredicate = HasTrue16BitInsts in
-    defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, any_fldexp>;
+    defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
 } // End FPDPRounding = 1
 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
 defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -899,6 +908,21 @@ defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>;
 } // End isCommutable = 1
 } // End isReMaterializable = 1
 
+class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
+  (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+               (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
+  (inst $src0_modifiers, $src0,
+        $src1_modifiers, $src1,
+        $clamp, /* clamp */
+        $omod /* omod */)
+>;
+
+let OtherPredicates = [NotHasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
+
+let OtherPredicates = [HasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+
 let SubtargetPredicate = isGFX11Plus in {
   let isCommutable = 1 in {
     defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;

diff  --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
index 6ea135c95d1f0..b1b54005bee28 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -12931,11 +12931,11 @@ v_ldexp_f16_e64 v5, v1, 0
 v_ldexp_f16_e64 v5, v1, -1
 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00]
 
-v_ldexp_f16_e64 v5, v1, 0x3800
-// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00]
+v_ldexp_f16_e64 v5, v1, 0.5
+// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00]
+// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, -v1, v2
 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x20]

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
index ffb83100c08ce..43c71617bb385 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
@@ -734,7 +734,7 @@ v_ldexp_f16_e64 v5, ttmp15, src_scc
 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00]
 
 v_ldexp_f16_e64 v5, m0, 0.5
-// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00]
 
 v_ldexp_f16_e64 v5, exec_lo, -1
 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00]

diff  --git a/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s
index d46a97f1cbd58..d4c31f14d3bfc 100644
--- a/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s
@@ -1,5 +1,4 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding -filetype=null 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s
 
 v_interp_p1_f32_e64 v5, v2, attr0.x
 // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00]
@@ -12827,10 +12826,10 @@ v_ldexp_f16_e64 v5, v1, -1
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, 0.5
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, src_vccz
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00]

diff  --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s
index 34d0d77c07f36..8781a01f1eb8e 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s
@@ -1,5 +1,4 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s
 
 v_interp_p1_f32_e64 v5, v2, attr0.x
 // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00]
@@ -11240,10 +11239,10 @@ v_ldexp_f16_e64 v5, v1, -1
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, 0.5
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, -4.0
-// ERR: [[@LINE-1]]:25: error: literal operands are not supported
+// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 
 v_ldexp_f16_e64 v5, v1, src_vccz
 // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index c5cd6f6c4dcea..0785ba2ea2eb6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -7520,13 +7520,13 @@
 # GFX10: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00
 
-# GFX10: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00]
+# GFX10: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00
 
 # GFX10: v_ldexp_f16_e64 v5, v1, 0               ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00
 
-# GFX10: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00]
+# GFX10: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00
 
 # GFX10: v_ldexp_f16_e64 v5, v1, exec_hi         ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x00,0x00]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt
index 4e430a88a1cb4..3141e8f4b2cbb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt
@@ -543,7 +543,7 @@
 # GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc     ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00
 
-# GFX11: v_ldexp_f16_e64 v5, m0, 0x3800          ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00]
+# GFX11: v_ldexp_f16_e64 v5, m0, 0.5             ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00]
 0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00
 
 # GFX11: v_ldexp_f16_e64 v5, exec_lo, -1         ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt
index a0277c706ee23..2b07d620fdad2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt
@@ -11178,10 +11178,10 @@
 # CHECK: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00
 
 # CHECK: v_ldexp_f16_e64 v5, -v1, v2             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
index c2ac84b44e0ca..e3ed9778f6fb4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
@@ -8814,10 +8814,10 @@
 # CHECK: v_ldexp_f16_e64 v5, v1, -1              ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, 0.5             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00
 
-# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400          ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00]
+# CHECK: v_ldexp_f16_e64 v5, v1, -4.0            ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00]
 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00
 
 # CHECK: v_ldexp_f16_e64 v5, -v1, v2             ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20]