[llvm] 869fc7e - [AMDGPU][MC][MI100+] Enable VOP3 variants of dot2c/dot4c/dot8c opcodes

Dmitry Preobrazhensky via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 29 06:38:39 PST 2022


Author: Dmitry Preobrazhensky
Date: 2022-11-29T17:38:18+03:00
New Revision: 869fc7eabd2a5c0effe7f64857e6f68ba71f800a

URL: https://github.com/llvm/llvm-project/commit/869fc7eabd2a5c0effe7f64857e6f68ba71f800a
DIFF: https://github.com/llvm/llvm-project/commit/869fc7eabd2a5c0effe7f64857e6f68ba71f800a.diff

LOG: [AMDGPU][MC][MI100+] Enable VOP3 variants of dot2c/dot4c/dot8c opcodes

Differential Revision: https://reviews.llvm.org/D138494

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/MC/AMDGPU/xdl-insts-err.s
    llvm/test/MC/AMDGPU/xdl-insts-gfx908.s
    llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 0b4f2c6b59b66..ba5d62e81fc5f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4242,8 +4242,8 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
   }
 
   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
-  if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
-      !(TSFlags & SIInstrFlags::VOP3P)) {
+  if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
+      (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
     if (OpSel & 3)
@@ -8228,17 +8228,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
   // it has src2 register operand that is tied to dst operand
   // we don't allow modifiers for this operand in assembler so src2_modifiers
   // should be 0.
-  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
-      Opc == AMDGPU::V_MAC_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F32_e64_vi ||
-      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
-      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
-      Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
-      Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
-      Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi ||
-      Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
-      Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
-      Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
-      Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11) {
+  if (isMAC(Opc)) {
     auto it = Inst.begin();
     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 3e189e2cc2f38..ef084cf74a975 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -599,20 +599,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
   } while (false);
 
-  if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
-              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
-              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
-              MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
-              MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
-              MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx11 ||
-              MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
-              MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F16_t16_e64_gfx11)) {
+  if (Res && AMDGPU::isMAC(MI.getOpcode())) {
     // Insert dummy unused src2_modifiers.
     insertNamedMCOperand(MI, MCOperand::createImm(0),
                          AMDGPU::OpName::src2_modifiers);

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 97948168c9e7d..6c519939507a2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -434,6 +434,27 @@ bool isVOPD(unsigned Opc) {
   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
 }
 
+bool isMAC(unsigned Opc) {
+  return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
+         Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
+         Opc == AMDGPU::V_MAC_F32_e64_vi ||
+         Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+         Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
+         Opc == AMDGPU::V_MAC_F16_e64_vi ||
+         Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
+         Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+         Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
+         Opc == AMDGPU::V_FMAC_F32_e64_vi ||
+         Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
+         Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
+         Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
+         Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
+         Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
+         Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
+         Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
+         Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
+}
+
 bool isTrue16Inst(unsigned Opc) {
   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
   return Info ? Info->IsTrue16 : false;

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 0f297823ced0f..71db4261247bd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -509,6 +509,9 @@ int getVOPDFull(unsigned OpX, unsigned OpY);
 LLVM_READONLY
 bool isVOPD(unsigned Opc);
 
+LLVM_READNONE
+bool isMAC(unsigned Opc);
+
 namespace VOPD {
 
 enum Component : unsigned {

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index c98d3b9702355..4eb0f5ec6c319 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -509,12 +509,22 @@ class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
 def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
   let Src0ModDPP = FPVRegInputMods;
   let Src1ModDPP = FPVRegInputMods;
+  let HasClamp = 1;
 }
 
 def VOP_DOT_ACC_I32_I32   : VOP_DOT_ACC<i32, i32> {
   let HasExtVOP3DPP = 0;
   let HasSrc0Mods = 1;
   let HasSrc1Mods = 1;
+  let HasClamp = 1;
+
+  let Src0Mod = Int32InputMods;
+  let Src1Mod = Int32InputMods;
+  let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret,
+                       3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/,
+                       1 /*HasSrc2Mods*/, HasOMod,
+                       Src0Mod, Src1Mod, Src2Mod>.ret;
+  let Asm64 = "$vdst, $src0, $src1$clamp";
 }
 
 // Write out to vcc or arbitrary SGPR.
@@ -2281,7 +2291,7 @@ defm V_FMAMK_F32        : VOP2_Real_MADK_gfx940 <0x17>;
 defm V_FMAAK_F32        : VOP2_Real_MADK_gfx940 <0x18>;
 }
 
-multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
+multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
   def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
 }
 

diff  --git a/llvm/test/MC/AMDGPU/xdl-insts-err.s b/llvm/test/MC/AMDGPU/xdl-insts-err.s
index 21f157f0896bb..03b37bcc7a3f5 100644
--- a/llvm/test/MC/AMDGPU/xdl-insts-err.s
+++ b/llvm/test/MC/AMDGPU/xdl-insts-err.s
@@ -5,7 +5,6 @@
 v_dot2c_f32_f16 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
-// GFX908-ERR: error: e64 variant of this instruction is not supported
 v_dot2c_f32_f16_e64 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
@@ -16,7 +15,6 @@ v_dot2c_f32_f16_sdwa v0, v1, v2
 v_dot2c_i32_i16 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
-// GFX908-ERR: error: e64 variant of this instruction is not supported
 v_dot2c_i32_i16_e64 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
@@ -27,7 +25,6 @@ v_dot2c_i32_i16_sdwa v0, v1, v2
 v_dot4c_i32_i8 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
-// GFX908-ERR: error: e64 variant of this instruction is not supported
 v_dot4c_i32_i8_e64 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
@@ -38,7 +35,6 @@ v_dot4c_i32_i8_sdwa v0, v1, v2
 v_dot8c_i32_i4 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU
-// GFX908-ERR: error: e64 variant of this instruction is not supported
 v_dot8c_i32_i4_e64 v0, v1, v2
 
 // GFX906-ERR: error: instruction not supported on this GPU

diff  --git a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s
index 36ec8404da646..22a1f1316ea24 100644
--- a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s
+++ b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s
@@ -1,4 +1,6 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck %s
 
 // CHECK: encoding: [0x01,0x05,0x0a,0x6e]
 v_dot2c_f32_f16 v5, v1, v2
@@ -102,6 +104,27 @@ v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // CHECK: encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00]
 v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 
+// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00]
+v_dot2c_f32_f16_e64 v5, v1, src_scc
+
+// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00]
+v_dot2c_f32_f16_e64 v5, v255, src_execz
+
+// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00]
+v_dot2c_f32_f16_e64 v5, s101, s101
+
+// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00]
+v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo
+
+// CHECK: encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40]
+v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi|
+
+// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10]
+v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4
+
+// CHECK: encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38]
+v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2
+
 // CHECK: encoding: [0x01,0x05,0x0a,0x70]
 v_dot2c_i32_i16 v5, v1, v2
 
@@ -192,6 +215,27 @@ v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
 // CHECK: encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00]
 v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
 
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00]
+v_dot2c_i32_i16_e64 v5, v1, src_scc
+
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00]
+v_dot2c_i32_i16_e64 v5, v255, src_execz
+
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00]
+v_dot2c_i32_i16_e64 v5, s101, s101
+
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00]
+v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo
+
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00]
+v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi
+
+// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00]
+v_dot2c_i32_i16_e64 v5, src_execz, 0.5
+
+// CHECK: encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00]
+v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp
+
 // CHECK: encoding: [0x01,0x05,0x0a,0x72]
 v_dot4c_i32_i8 v5, v1, v2
 
@@ -282,6 +326,27 @@ v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
 // CHECK: encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00]
 v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
 
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00]
+v_dot4c_i32_i8_e64 v5, v1, src_scc
+
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00]
+v_dot4c_i32_i8_e64 v5, v255, src_execz
+
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00]
+v_dot4c_i32_i8_e64 v5, s101, s101
+
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00]
+v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo
+
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00]
+v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi
+
+// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00]
+v_dot4c_i32_i8_e64 v5, src_execz, 0.5
+
+// CHECK: encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00]
+v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp
+
 // CHECK: encoding: [0x01,0x05,0x0a,0x74]
 v_dot8c_i32_i4 v5, v1, v2
 
@@ -372,6 +437,27 @@ v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
 // CHECK: encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00]
 v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
 
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00]
+v_dot8c_i32_i4_e64 v5, v1, src_scc
+
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00]
+v_dot8c_i32_i4_e64 v5, v255, src_execz
+
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00]
+v_dot8c_i32_i4_e64 v5, s101, s101
+
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00]
+v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo
+
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00]
+v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi
+
+// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00]
+v_dot8c_i32_i4_e64 v5, src_execz, 0.5
+
+// CHECK: encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00]
+v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp
+
 // CHECK: encoding: [0x01,0x05,0x0a,0x78]
 v_pk_fmac_f16 v5, v1, v2
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt
index c74e5964706b2..b643007015d47 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt
@@ -1,4 +1,6 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -disassemble -show-encoding < %s | FileCheck %s
 
 # CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6e]
 0x01,0x05,0x0a,0x6e
@@ -96,6 +98,27 @@
 # CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00]
 0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00
 
+# CHECK: v_dot2c_f32_f16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00]
+0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00
+
+# CHECK: v_dot2c_f32_f16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00]
+0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00
+
+# CHECK: v_dot2c_f32_f16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00]
+0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00
+
+# CHECK: v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00]
+0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00
+
+# CHECK: v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi| ; encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40]
+0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40
+
+# CHECK: v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4 ; encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10]
+0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10
+
+# CHECK: v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2 ; encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38]
+0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38
+
 # CHECK: v_dot2c_i32_i16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x70]
 0x01,0x05,0x0a,0x70
 
@@ -180,6 +203,27 @@
 # CHECK: v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00]
 0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00
 
+# CHECK: v_dot2c_i32_i16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00]
+0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00]
+0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00]
+0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00]
+0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00]
+0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00]
+0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00
+
+# CHECK: v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00]
+0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00
+
 # CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72]
 0x01,0x05,0x0a,0x72
 
@@ -264,6 +308,27 @@
 # CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00]
 0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00
 
+# CHECK: v_dot4c_i32_i8_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00]
+0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00]
+0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00]
+0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00]
+0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00]
+0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00]
+0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00
+
+# CHECK: v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00]
+0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00
+
 # CHECK: v_dot8c_i32_i4_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74]
 0x01,0x05,0x0a,0x74
 
@@ -348,6 +413,27 @@
 # CHECK: v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00]
 0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00
 
+# CHECK: v_dot8c_i32_i4_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00]
+0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00]
+0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00]
+0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00]
+0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00]
+0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00]
+0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00
+
+# CHECK: v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00]
+0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00
+
 # CHECK: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78]
 0x01,0x05,0x0a,0x78
 


        


More information about the llvm-commits mailing list