[llvm] [AMDGPU][True16][MC] true16 for v_cmpx_lt_f16 (PR #122936)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 14 12:51:04 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

True16 format for v_cmpx_lt_f16. Update VOPCX t16 and fake16 pseudo.

---

Patch is 174.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122936.diff


29 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+103-26) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s (+37-28) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s (+17-8) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s (+10-4) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s (+45-30) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s (+37-28) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s (+15-6) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s (+31-12) 
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s (+31-12) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s (+10-4) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s (+41-32) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s (+21-12) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s (+42-30) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s (+34-28) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s (+12-6) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s (+31-12) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s (+31-12) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt (+44-18) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt (+24-8) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt (+16-6) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt (+74-19) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt (+56-18) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt (+32-6) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt (+26-6) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt (+104-19) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt (+29-9) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt (+66-19) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt (+48-18) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt (+24-6) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 14e34c9e00ec6f..8542383a090b23 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -153,8 +153,7 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
   let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
                      Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
-  let AsmVOP3Base = !if(Src0VT.isFP, "$src0_modifiers, $src1_modifiers$clamp",
-                                     "$src0, $src1");
+  let HasDst = 0;
   let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
   let EmitDst = 0;
 }
@@ -164,23 +163,53 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
   def _t16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
     let IsTrue16 = 1;
     let IsRealTrue16 = 1;
-    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-    let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-    let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+    let HasOpSel = 1;
+    let HasModifiers = 1; // All instructions at least have OpSel
+    let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+    let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+    let Src0VOP3DPP = VGPRSrc_16;
+    let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
     let IsTrue16 = 1;
+    let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-    let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-    let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+    let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+    let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+    let Src0VOP3DPP = VGPRSrc_32;
+    let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
@@ -1419,6 +1448,13 @@ class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
   let Inst{14} = 0;
 }
 
+class VOPC64_DPP16_NoDst_t16<bits<10> op, VOP_DPP_Pseudo ps,
+                         string opName = ps.OpName>
+    : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
+  let Inst{7-0} = ? ;
+  let Inst{14} = 0;
+}
+
 class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
     : VOP3_DPP8_Base<opName, ps.Pfl> {
   Instruction Opcode = !cast<Instruction>(NAME);
@@ -1452,6 +1488,13 @@ class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
   let Constraints = "";
 }
 
+class VOPC64_DPP8_NoDst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+    : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
+  let Inst{7-0} = ? ;
+  let Inst{14} = 0;
+  let Constraints = "";
+}
+
 //===----------------------------------------------------------------------===//
 // Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
@@ -1619,7 +1662,7 @@ multiclass VOPCX_Real<GFXGen Gen, bits<9> op> {
                         # " " # ps32.AsmOperands;
     }
     def _e64#Gen.Suffix :
-      VOP3_Real<ps64, Gen.Subtarget>,
+      VOP3_Real_Gen<ps64, Gen>,
       VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
         let Inst{7-0} = ?; // sdst
         let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
@@ -1677,11 +1720,22 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
             VOPCe<op{7-0}> {
         let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
       }
-      def _e64#Gen.Suffix
-          : VOP3_Real_Gen<ps64, Gen, asm_name>,
-            VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
-        let Inst{7-0} = ? ; // sdst
-        let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+
+      if ps64.Pfl.IsRealTrue16 then {
+        def _e64#Gen.Suffix
+            : VOP3_Real_Gen<ps64, Gen, asm_name>,
+              VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
+          let Inst{7-0} = ? ; // sdst
+          let Inst{14} = 0;
+          let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+        }
+      } else {
+        def _e64#Gen.Suffix
+            : VOP3_Real_Gen<ps64, Gen, asm_name>,
+              VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+          let Inst{7-0} = ? ; // sdst
+          let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+        }
       }
 
       defm : VOPCXInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1695,14 +1749,25 @@ multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
       if ps64.Pfl.HasExtVOP3DPP then {
         defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
         defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
-        def _e64_dpp#Gen.Suffix
-            : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
-              SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
-          let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
-        }
         defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
-        def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
-          let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+        if ps64.Pfl.IsRealTrue16 then {
+          def _e64_dpp#Gen.Suffix
+              : VOPC64_DPP16_NoDst_t16<{0, op}, psDPP, asm_name>,
+                SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+            let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
+          }
+          def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst_t16<{0, op}, ps64, asm_name> {
+            let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+          }
+        } else {
+          def _e64_dpp#Gen.Suffix
+              : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
+                SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+            let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
+          }
+          def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
+            let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+          }
         }
       } // End if ps64.Pfl.HasExtVOP3DPP
     } // End DecoderNamespace
@@ -1756,11 +1821,23 @@ multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
     string OpName = NAME, string pseudo_mnemonic = ""> :
   VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
 
+multiclass VOPCX_Real_t16_and_fake16_gfx11<bits<9> op, string asm_name,
+    string OpName = NAME, string pseudo_mnemonic = ""> {
+  defm _t16: VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+  defm _fake16: VOPCX_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
 multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
     string OpName = NAME, string pseudo_mnemonic = ""> :
   VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
   VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
 
+multiclass VOPCX_Real_t16_and_fake16_gfx11_gfx12<bits<9> op, string asm_name,
+    string OpName = NAME, string pseudo_mnemonic = ""> {
+  defm _t16: VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+  defm _fake16: VOPCX_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
 defm V_CMP_F_F16_fake16      : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
 defm V_CMP_LT_F16            : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
 defm V_CMP_EQ_F16_fake16     : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1848,7 +1925,7 @@ defm V_CMP_CLASS_F32     : VOPC_Real_gfx11_gfx12<0x07e>;
 defm V_CMP_CLASS_F64     : VOPC_Real_gfx11_gfx12<0x07f>;
 
 defm V_CMPX_F_F16_fake16     : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
+defm V_CMPX_LT_F16           : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
 defm V_CMPX_EQ_F16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
 defm V_CMPX_LE_F16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
 defm V_CMPX_GT_F16_fake16    : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 462ad7ba6516d5..60ec94446235ed 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -1347,47 +1347,56 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
 v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
-v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
 
-v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
 
-v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
 
 v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
index 46f1db837b0dd8..fb2b28874bd04f 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
@@ -326,17 +326,26 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
 
-v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmpx_lt_f16_e64_dpp -|v25...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/122936


More information about the llvm-commits mailing list