[clang] [llvm] [AMDGPU] Add dot4 fp8/bf8 instructions for gfx1170 (PR #180516)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 9 04:59:51 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 49.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180516.diff
15 Files Affected:
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+2-1)
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+5-5)
- (modified) llvm/lib/TargetParser/TargetParser.cpp (+1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll (+126-2)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s (+122)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp16.s (+25)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp16_err.s (+24)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp8.s (+25)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_dpp8_err.s (+27)
- (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3p_err.s (+133)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p.txt (+122)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_dpp16.txt (+25)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_dpp8.txt (+25)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3p_err.txt (+144)
``````````diff
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl
index 2eb8b6d5f1069..576fb475f883d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl
@@ -1,5 +1,6 @@
// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
typedef unsigned int uint;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 9d723c86031f2..723b064afd64e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1885,7 +1885,8 @@ def FeatureISAVersion11_7_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
- FeatureFP8ConversionInsts])>;
+ FeatureFP8ConversionInsts,
+ FeatureDot11Insts])>;
def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 256dd0bb027ad..063546e1a5bc2 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -693,7 +693,6 @@ def VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>,
multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> {
defm NAME : VOP3PInst<OpName, VOP3P_DOTF8_Profile, null_frag, 1>;
- let SubtargetPredicate = isGFX12Plus in
def : GCNPat <(intrinsic_node i32:$src0, i32:$src1,
(VOP3Mods f32:$src2, i32:$src2_modifiers)),
(!cast<Instruction>(NAME) i32:$src0, i32:$src1,
@@ -2493,6 +2492,11 @@ multiclass VOP3P_Realtriple<GFXGen Gen, bits<8> op, string backing_ps_name = NAM
multiclass VOP3P_Realtriple_gfx11_gfx12<bits<8> op>
: VOP3P_Realtriple<GFX11Gen, op>, VOP3P_Realtriple<GFX12Gen, op>;
+defm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple_gfx11_gfx12<0x24>;
+defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple_gfx11_gfx12<0x25>;
+defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple_gfx11_gfx12<0x26>;
+defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple_gfx11_gfx12<0x27>;
+
//===----------------------------------------------------------------------===//
// GFX12
//===----------------------------------------------------------------------===//
@@ -2546,10 +2550,6 @@ def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">;
defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>;
defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>;
-defm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x24>;
-defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
-defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
-defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;
//===----------------------------------------------------------------------===//
// GFX11
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index d317ca4e1194a..fc5d7519bdffe 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -519,6 +519,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
break;
case GK_GFX1170:
// TODO-GFX1170: Update features map for gfx1170
+ Features["dot11-insts"] = true;
Features["fp8-conversion-insts"] = true;
[[fallthrough]];
case GK_GFX1153:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
index 796f6b84b1d14..5891456364fa7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
@@ -1,8 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GCN,GFX1170 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GCN,GFX1170 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -18,6 +26,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_bf8_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -34,6 +48,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_bf8_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -50,6 +70,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -67,6 +93,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -84,6 +116,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_fp8(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -99,6 +137,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_fp8_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -115,6 +159,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_fp8_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -131,6 +181,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -148,6 +204,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -165,6 +227,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_fp8(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -180,6 +248,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_fp8_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -196,6 +270,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_fp8_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -212,6 +292,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -229,6 +315,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -246,6 +338,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_bf8(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -261,6 +359,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_bf8_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -277,6 +381,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_bf8_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -293,6 +403,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -310,6 +426,12 @@ entry:
}
define float @test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) {
+; GFX1170-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -333,3 +455,5 @@ declare float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c)
declare float @llvm.fabs.f32(float %a)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s
new file mode 100644
index 0000000000000..eb2ad1c3faa15
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1170_asm_vop3p.s
@@ -0,0 +1,122 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1170 %s | FileCheck --check-prefix=GFX1170 %s
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, v3
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1]
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x9c]
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1]
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x24,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, s0, v2, v3
+// GFX1170: v_dot4_f32_fp8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x00,0x04,0x0e,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, v1, s0, v3
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x01,0x0c,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, s0
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x02,0x18]
+
+v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3
+// GFX1170: v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0xf2,0x04,0x0e,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0xe5,0x0d,0x1c]
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0xca,0x1b]
+
+v_dot4_f32_fp8_bf8 v0, v1, v2, 1
+// GFX1170: v_dot4_f32_fp8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x06,0x1a]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, v3
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1]
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x9c]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1]
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x25,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, s0, v2, v3
+// GFX1170: v_dot4_f32_bf8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x00,0x04,0x0e,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, v1, s0, v3
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x01,0x0c,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, s0
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x02,0x18]
+
+v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3
+// GFX1170: v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0xf2,0x04,0x0e,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0xe5,0x0d,0x1c]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0xca,0x1b]
+
+v_dot4_f32_bf8_fp8 v0, v1, v2, 1
+// GFX1170: v_dot4_f32_bf8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x06,0x1a]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, v3
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1]
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x9c]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1]
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x26,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, s0, v2, v3
+// GFX1170: v_dot4_f32_fp8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x00,0x04,0x0e,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, v1, s0, v3
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x01,0x0c,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, s0
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x02,0x18]
+
+v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3
+// GFX1170: v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0xf2,0x04,0x0e,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0xe5,0x0d,0x1c]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0xca,0x1b]
+
+v_dot4_f32_fp8_fp8 v0, v1, v2, 1
+// GFX1170: v_dot4_f32_fp8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x06,0x1a]
+
+v_dot4_f32_bf8_bf8 v0, v1, v2, v3
+// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1]
+// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x9c]
+
+v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1]
+// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x27,0xcc,0x01,0x05,0x0e,0x1c]
+
+v_dot4_f32_bf8_bf8 v0, s0, v2, v3
+// GFX1170: v_dot4_f32_bf8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x00,0x04,0x0e,0x1c]
+
+v_dot4_f32_bf8_bf8 v0, v1, s0, v3
+// GFX1170: v_dot4_f32_bf8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x01,0x0c,0x1c]
+
+v_dot4_f32_bf8_b...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180516
More information about the cfe-commits
mailing list