[llvm] 8f8e4e3 - [AMDGPU][MC][GFX11] Correct v_fmac_.*_e64_dpp

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 7 06:22:11 PDT 2022


Author: Dmitry Preobrazhensky
Date: 2022-10-07T16:21:55+03:00
New Revision: 8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509

URL: https://github.com/llvm/llvm-project/commit/8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509
DIFF: https://github.com/llvm/llvm-project/commit/8f8e4e3b38c4fdb2bc5336bf324af14c2ab61509.diff

LOG: [AMDGPU][MC][GFX11] Correct v_fmac_.*_e64_dpp

Differential Revision: https://reviews.llvm.org/D134961

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
    llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
    llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
    llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index db2e331ddd37..a4e9871fd4e1 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8863,18 +8863,43 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
 }
 
-void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
+void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
+                                 bool IsDPP8) {
   OptionalImmIndexMap OptionalIdx;
   unsigned Opc = Inst.getOpcode();
-  bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
-  unsigned I = 1;
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  bool HasModifiers =
+      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
+
+  // MAC instructions are special because they have 'old'
+  // operand which is not tied to dst (but assumed to be).
+  // They also have dummy unused src2_modifiers.
+  int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
+  int Src2ModIdx =
+      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+  bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
+               Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
+
+  unsigned I = 1;
   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   }
 
   int Fi = 0;
   for (unsigned E = Operands.size(); I != E; ++I) {
+
+    if (IsMAC) {
+      int NumOperands = Inst.getNumOperands();
+      if (OldIdx == NumOperands) {
+        // Handle old operand
+        constexpr int DST_IDX = 0;
+        Inst.addOperand(Inst.getOperand(DST_IDX));
+      } else if (Src2ModIdx == NumOperands) {
+        // Add unused dummy src2_modifiers
+        Inst.addOperand(MCOperand::createImm(0));
+      }
+    }
+
     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
                                             MCOI::TIED_TO);
     if (TiedTo != -1) {

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fdfb2beb16e9..7e11cd42c806 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -794,37 +794,73 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
   return Modifiers;
 }
 
+// MAC opcodes have special old and src2 operands.
+// src2 is tied to dst, while old is not tied (but assumed to be).
+bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
+  constexpr int DST_IDX = 0;
+  auto Opcode = MI.getOpcode();
+  const auto &Desc = MCII->get(Opcode);
+  auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
+
+  if (OldIdx != -1 && Desc.getOperandConstraint(
+                          OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
+    assert(AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2) != -1);
+    assert(Desc.getOperandConstraint(
+               AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
+               MCOI::OperandConstraint::TIED_TO) == DST_IDX);
+    return true;
+  }
+
+  return false;
+}
+
+// Create dummy old operand and insert dummy unused src2_modifiers
+void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
+  assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
+  insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
+  insertNamedMCOperand(MI, MCOperand::createImm(0),
+                       AMDGPU::OpName::src2_modifiers);
+}
+
 // We must check FI == literal to reject not genuine dpp8 insts, and we must
 // first add optional MI operands to check FI
 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
   unsigned Opc = MI.getOpcode();
-  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
   if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
     convertVOP3PDPPInst(MI);
   } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
              AMDGPU::isVOPC64DPP(Opc)) {
     convertVOPCDPPInst(MI);
-  } else if (MI.getNumOperands() < DescNumOps &&
-             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
-    auto Mods = collectVOPModifiers(MI);
-    insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
-                         AMDGPU::OpName::op_sel);
   } else {
-    // Insert dummy unused src modifiers.
-    if (MI.getNumOperands() < DescNumOps &&
-        AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
-      insertNamedMCOperand(MI, MCOperand::createImm(0),
-                           AMDGPU::OpName::src0_modifiers);
+    if (isMacDPP(MI))
+      convertMacDPPInst(MI);
 
+    unsigned DescNumOps = MCII->get(Opc).getNumOperands();
     if (MI.getNumOperands() < DescNumOps &&
-        AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
-      insertNamedMCOperand(MI, MCOperand::createImm(0),
-                           AMDGPU::OpName::src1_modifiers);
+        AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
+      auto Mods = collectVOPModifiers(MI);
+      insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+                           AMDGPU::OpName::op_sel);
+    } else {
+      // Insert dummy unused src modifiers.
+      if (MI.getNumOperands() < DescNumOps &&
+          AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+        insertNamedMCOperand(MI, MCOperand::createImm(0),
+                             AMDGPU::OpName::src0_modifiers);
+
+      if (MI.getNumOperands() < DescNumOps &&
+          AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+        insertNamedMCOperand(MI, MCOperand::createImm(0),
+                             AMDGPU::OpName::src1_modifiers);
+    }
   }
   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
 }
 
 DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
+  if (isMacDPP(MI))
+    convertMacDPPInst(MI);
+
   unsigned Opc = MI.getOpcode();
   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
   if (MI.getNumOperands() < DescNumOps &&

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index e987778d667b..d0aef9cdf79d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -165,6 +165,7 @@ class AMDGPUDisassembler : public MCDisassembler {
   DecodeStatus convertVOP3DPPInst(MCInst &MI) const;
   DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
   DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
+  void convertMacDPPInst(MCInst &MI) const;
 
   MCOperand decodeOperand_VGPR_32(unsigned Val) const;
   MCOperand decodeOperand_VGPR_32_Lo128(unsigned Val) const;
@@ -260,6 +261,8 @@ class AMDGPUDisassembler : public MCDisassembler {
   bool isGFX11Plus() const;
 
   bool hasArchitectedFlatScratch() const;
+
+  bool isMacDPP(MCInst &MI) const;
 };
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 5710e06f6632..98bc16464a56 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -441,6 +441,11 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
   let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
   let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
   let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
+  let AsmVOP3DPPBase =
+      getAsmVOP3DPPBase<2 /*NumSrcArgs*/, HasDst, HasClamp,
+                        HasOpSel, HasOMod, IsVOP3P, HasModifiers,
+                        HasModifiers, HasModifiers,
+                        0 /*Src2HasMods*/, DstVT>.ret;
   let HasSrc2 = 0;
   let HasSrc2Mods = 0;
 

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 52198e9f2acf..0ae183f50487 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -9157,6 +9157,90 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x
 v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
 // GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
 
+v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_mirror
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+
+v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_mirror
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+
 v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
 // GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff]
 

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index bf96f7e648d4..e9e5321a0ade 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -3957,6 +3957,30 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
 v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
 // GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
 
+v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
+v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
 v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 938440641641..5f55d30709c8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -7623,6 +7623,90 @@
 # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
 0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01
+
+# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13]
+0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13
+
+# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01
+
+# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13]
+0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13
+
+# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30]
+0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30
+
 # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index 95403ef155a1..e883b565942e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -3003,6 +3003,30 @@
 # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
 
+# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00
+
+# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05
+
+# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00
+
 # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
 


        


More information about the llvm-commits mailing list