[PATCH] D158468: [AMDGPU] Add sdot4 / sdot8 intrinsics for gfx11
Jeffrey Byrnes via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 21 17:21:06 PDT 2023
jrbyrnes created this revision.
jrbyrnes added a reviewer: arsenm.
Herald added subscribers: foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
jrbyrnes requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
This provides a uniform way to lower into the relevant instructions across all generations.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D158468
Files:
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
@@ -4,6 +4,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
@@ -11,6 +12,7 @@
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX908: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11: v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
@@ -31,6 +33,7 @@
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX908: v_dot8c_i32_i4_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX11: v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -3,12 +3,14 @@
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
@@ -28,6 +30,7 @@
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GF11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -436,6 +436,20 @@
let SubtargetPredicate = HasDot8Insts in {
defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>;
defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>;
+
+def : GCNPat < (int_amdgcn_sdot8 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT8_I32_IU4 (i32 8), i32:$src0,
+ (i32 8), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
+
+def : GCNPat < (int_amdgcn_sdot4 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT4_I32_IU8 (i32 8), i32:$src0,
+ (i32 8), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
} // End SubtargetPredicate = HasDot8Insts
def : UDot2Pat<V_DOT2_U32_U16>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158468.552172.patch
Type: text/x-patch
Size: 4359 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230822/974eb541/attachment.bin>
More information about the llvm-commits
mailing list