[llvm] 8de1f04 - [AMDGPU] gfx11 Fix VOP3 dot instructions
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 22 02:48:45 PDT 2022
Author: Petar Avramovic
Date: 2022-07-22T11:43:35+02:00
New Revision: 8de1f04c77af1a6e348157073d9924439b71bf6f
URL: https://github.com/llvm/llvm-project/commit/8de1f04c77af1a6e348157073d9924439b71bf6f
DIFF: https://github.com/llvm/llvm-project/commit/8de1f04c77af1a6e348157073d9924439b71bf6f.diff
LOG: [AMDGPU] gfx11 Fix VOP3 dot instructions
Fix src modifiers for operands with bf16 type.
op_sel[0:1] are ignored.
Differential Revision: https://reviews.llvm.org/D129084
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
llvm/test/MC/AMDGPU/gfx11_asm_dpp16.s
llvm/test/MC/AMDGPU/gfx11_asm_dpp8.s
llvm/test/MC/AMDGPU/gfx11_vop123.s
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1b5565ff58489..2a9393fc15957 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4169,7 +4169,9 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
return false;
}
- if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
+ uint64_t TSFlags = MII.get(Opc).TSFlags;
+
+ if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx != -1) {
if (Inst.getOperand(OpSelIdx).getImm() != 0)
@@ -4182,6 +4184,15 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
}
}
+ // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
+ if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
+ !(TSFlags & SIInstrFlags::VOP3P)) {
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ if (OpSel & 3)
+ return false;
+ }
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 7788f1d82273d..a911483cade59 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -760,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
- // FIXME VOP3 DPP versions are unsupported
- let HasExtVOP3DPP = 0;
let HasClamp = 0;
let HasOMod = 0;
- let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp, HasOMod,
- !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret;
+ // Override modifiers for bf16(i16) (same as float modifiers).
+ let HasSrc0Mods = 1;
+ let HasSrc1Mods = 1;
+ let HasSrc2Mods = 1;
+ let Src0ModDPP = FPVRegInputMods;
+ let Src1ModDPP = FPVRegInputMods;
+ let Src2ModVOP3DPP = FPVRegInputMods;
+ let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ HasClamp, HasOMod, FPVRegInputMods,
+ FPVRegInputMods, FPVRegInputMods>.ret;
+ let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret;
}
let SubtargetPredicate = isGFX11Plus in {
@@ -784,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
} // End SubtargetPredicate = isGFX11Plus
-let SubtargetPredicate = HasDot8Insts in {
+let SubtargetPredicate = HasDot8Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
}
@@ -909,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>;
defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>;
-// FIXME VOP3 DPP Dot instructions are unsupported
-defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>;
-defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>;
+defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>;
+defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>;
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 6da537ff5117c..b65ca2d6b1b38 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -269,6 +269,10 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
+class VOP3DotOpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11<op, p>{
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+}
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
@@ -1271,6 +1275,7 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f
class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP<op, opName, ps.Pfl, 1> {
let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1287,6 +1292,7 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP8<op, opName, ps.Pfl> {
let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1328,6 +1334,15 @@ let AssemblerPredicate = isGFX11Only,
VOP3e_gfx11<op, ps.Pfl>;
}
}
+ multiclass VOP3Dot_Real_Base_gfx11<bits<10> op, string opName = NAME,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3DotOpSel_gfx11<op, ps.Pfl>;
+ }
+ }
multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
@@ -1357,6 +1372,15 @@ let AssemblerPredicate = isGFX11Only,
let DecoderNamespace = "DPPGFX11";
}
}
+
+ multiclass VOP3Dot_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> {
+ def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> {
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+
multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1370,6 +1394,16 @@ let AssemblerPredicate = isGFX11Only,
let DecoderNamespace = "DPP8GFX11";
}
}
+
+ multiclass VOP3Dot_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> {
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+
multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1408,6 +1442,12 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op,
VOP3_Real_dpp_Base_gfx11<op, opName>,
VOP3_Real_dpp8_Base_gfx11<op, opName>;
+multiclass VOP3Dot_Realtriple_gfx11<bits<10> op,
+ bit isSingle = 0, string opName = NAME> :
+ VOP3Dot_Real_Base_gfx11<op, opName, isSingle>,
+ VOP3Dot_Real_dpp_Base_gfx11<op, opName>,
+ VOP3Dot_Real_dpp8_Base_gfx11<op, opName>;
+
multiclass VOP3Only_Realtriple_gfx11<bits<10> op> :
VOP3_Realtriple_gfx11<op, 1>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
index e48fc945d049f..295684a77e972 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,SDAG-GFX11
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11
declare i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a, <2 x i16> %b, i16 %c)
-define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16(
-; GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16:
+define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16(
+; GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
@@ -30,3 +30,47 @@ entry:
store i16 %r.val, i16 addrspace(1)* %r
ret void
}
+
+define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16_dpp(
+; SDAG-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_dpp:
+; SDAG-GFX11: ; %bb.0: ; %entry
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: scratch_load_b32 v0, off, s2
+; SDAG-GFX11-NEXT: scratch_load_u16 v1, off, s3
+; SDAG-GFX11-NEXT: scratch_load_b32 v2, off, s1
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0)
+; SDAG-GFX11-NEXT: v_dot2_bf16_bf16_e64_dpp v0, v2, v0, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; SDAG-GFX11-NEXT: scratch_store_b16 off, v0, s0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_dpp:
+; GISEL-GFX11: ; %bb.0: ; %entry
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: scratch_load_b32 v0, off, s1
+; GISEL-GFX11-NEXT: scratch_load_b32 v1, off, s2
+; GISEL-GFX11-NEXT: scratch_load_u16 v2, off, s3
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0)
+; GISEL-GFX11-NEXT: v_dot2_bf16_bf16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+ i16 addrspace(5)* %r,
+ <2 x i16> addrspace(5)* %a,
+ <2 x i16> addrspace(5)* %b,
+ i16 addrspace(5)* %c) {
+entry:
+ %a.val = load <2 x i16>, <2 x i16> addrspace(5)* %a
+ %b.val = load <2 x i16>, <2 x i16> addrspace(5)* %b
+ %c.val = load i16, i16 addrspace(5)* %c
+ %a.val.i32 = bitcast <2 x i16> %a.val to i32
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1)
+ %a.val.dpp.v2i16 = bitcast i32 %dpp to <2 x i16>
+ %r.val = call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a.val.dpp.v2i16, <2 x i16> %b.val, i16 %c.val)
+ store i16 %r.val, i16 addrspace(5)* %r
+ ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
index 15387bf7b6aaa..1be7a0fc7d471 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,SDAG-GFX11
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11
declare half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a, <2 x half> %b, half %c)
@@ -30,3 +30,47 @@ entry:
store half %r.val, half addrspace(1)* %r
ret void
}
+
+define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp(
+; SDAG-GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp:
+; SDAG-GFX11: ; %bb.0: ; %entry
+; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-GFX11-NEXT: scratch_load_b32 v0, off, s2
+; SDAG-GFX11-NEXT: scratch_load_u16 v1, off, s3
+; SDAG-GFX11-NEXT: scratch_load_b32 v2, off, s1
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0)
+; SDAG-GFX11-NEXT: v_dot2_f16_f16_e64_dpp v0, v2, v0, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; SDAG-GFX11-NEXT: scratch_store_b16 off, v0, s0
+; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; SDAG-GFX11-NEXT: s_endpgm
+;
+; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp:
+; GISEL-GFX11: ; %bb.0: ; %entry
+; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: scratch_load_b32 v0, off, s1
+; GISEL-GFX11-NEXT: scratch_load_b32 v1, off, s2
+; GISEL-GFX11-NEXT: scratch_load_u16 v2, off, s3
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0)
+; GISEL-GFX11-NEXT: v_dot2_f16_f16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0
+; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-GFX11-NEXT: s_endpgm
+ half addrspace(5)* %r,
+ <2 x half> addrspace(5)* %a,
+ <2 x half> addrspace(5)* %b,
+ half addrspace(5)* %c) {
+entry:
+ %a.val = load <2 x half>, <2 x half> addrspace(5)* %a
+ %b.val = load <2 x half>, <2 x half> addrspace(5)* %b
+ %c.val = load half, half addrspace(5)* %c
+ %a.val.i32 = bitcast <2 x half> %a.val to i32
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1)
+ %a.val.dpp.v2half = bitcast i32 %dpp to <2 x half>
+ %r.val = call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a.val.dpp.v2half, <2 x half> %b.val, half %c.val)
+ store half %r.val, half addrspace(5)* %r
+ ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_dpp16.s
index 5783666d65315..15604c591bea4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_dpp16.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR --implicit-check-not=error %s
v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00]
@@ -632,3 +633,27 @@ v_movrelsd_2_b32 v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
v_movrelsd_b32 v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// GFX11: encoding: [0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00]
+
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+
+v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00]
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+
+v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_dpp8.s
index 427511366a1bf..a198b85480d6b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_dpp8.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR --implicit-check-not=error %s
v_mov_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7]
// GFX11: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
@@ -519,3 +520,27 @@ v_subrev_nc_u32 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05]
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
+v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92]
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+
+v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4]
+// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92]
+
diff --git a/llvm/test/MC/AMDGPU/gfx11_vop123.s b/llvm/test/MC/AMDGPU/gfx11_vop123.s
index 37d7a0349f645..536e448ca8a6a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_vop123.s
+++ b/llvm/test/MC/AMDGPU/gfx11_vop123.s
@@ -4271,15 +4271,29 @@ v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] div:2
v_dot2_f16_f16 v0, v1, v2, v3
// GFX11: encoding: [0x00,0x00,0x66,0xd6,0x01,0x05,0x0e,0x04]
+v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[1,1,0,0]
+// W32-ERR: error: invalid op_sel operand
+// W64-ERR: error: invalid op_sel operand
+
v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[0,0,1,1]
// GFX11: encoding: [0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04]
+v_dot2_f16_f16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1]
+// GFX11: encoding: [0x00,0x65,0x66,0xd6,0x01,0x05,0x0e,0xc4]
+
v_dot2_bf16_bf16 v0, v1, v2, v3
// GFX11: encoding: [0x00,0x00,0x67,0xd6,0x01,0x05,0x0e,0x04]
+v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[1,1,0,0]
+// W32-ERR: error: invalid op_sel operand
+// W64-ERR: error: invalid op_sel operand
+
v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[0,0,1,1]
// GFX11: encoding: [0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04]
+v_dot2_bf16_bf16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1]
+// GFX11: encoding: [0x00,0x65,0x67,0xd6,0x01,0x05,0x0e,0xc4]
+
v_dot2c_f32_f16_e32 v5, v1, v2
// GFX11: encoding: [0x01,0x05,0x0a,0x04]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
index 5ae54762dbe4a..ee0fd9e6c834a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt
@@ -14538,15 +14538,29 @@
# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x66,0xd6,0x01,0x05,0x0e,0x04]
0x00,0x00,0x66,0xd6,0x01,0x05,0x0e,0x04
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04]
+0x00,0x78,0x66,0xd6,0x01,0x05,0x0e,0x04
+
# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04]
0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04
+# GFX11: v_dot2_f16_f16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] ; encoding: [0x00,0x65,0x66,0xd6,0x01,0x05,0x0e,0xc4]
+0x00,0x65,0x66,0xd6,0x01,0x05,0x0e,0xc4
+
# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x67,0xd6,0x01,0x05,0x0e,0x04]
0x00,0x00,0x67,0xd6,0x01,0x05,0x0e,0x04
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04]
+0x00,0x78,0x67,0xd6,0x01,0x05,0x0e,0x04
+
# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04]
0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04
+# GFX11: v_dot2_bf16_bf16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] ; encoding: [0x00,0x65,0x67,0xd6,0x01,0x05,0x0e,0xc4]
+0x00,0x65,0x67,0xd6,0x01,0x05,0x0e,0xc4
+
# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
0x01,0x05,0x0a,0x04
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 9f8bffbae8708..0dd6f1153edbc 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -14098,3 +14098,29 @@
# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x78,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00]
+0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x78,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00]
+0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc4,0x01,0xe4,0x04,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index 35f94e7a488bd..2f12035bdb0df 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -5266,3 +5266,29 @@
# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x78,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92]
+0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# op_sel[1:0] are ignored
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x78,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]
+0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92
+
+# GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92]
+0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc4,0x01,0x88,0x46,0x92
More information about the llvm-commits
mailing list