[PATCH] D158468: [AMDGPU] Add sdot4 / sdot8 intrinsics for gfx11

Mon Aug 21 17:21:06 PDT 2023

jrbyrnes created this revision.
jrbyrnes added a reviewer: arsenm.
Herald added subscribers: foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
jrbyrnes requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

This provides a uniform way to lower into the relevant instructions across all generations.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158468

Files:
  llvm/lib/Target/AMDGPU/VOP3PInstructions.td
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
@@ -4,6 +4,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
 declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
 
@@ -11,6 +12,7 @@
 ; GFX906:  v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX908:  v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:   v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11:   v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
@@ -31,6 +33,7 @@
 ; GFX906:  v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 ; GFX908:  v_dot8c_i32_i4_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
 ; GFX10:   v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX11:   v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
 define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -3,12 +3,14 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
 declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
 
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
 ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11:  v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
@@ -28,6 +30,7 @@
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
 ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 ; GFX10:  v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GF11:   v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
 define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -436,6 +436,20 @@
 let SubtargetPredicate = HasDot8Insts  in {
 defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>;
 defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>;
+
+def : GCNPat < (int_amdgcn_sdot8 i32:$src0,
+                                 i32:$src1,
+                                 i32:$src2, (i1 timm:$clamp)),
+               (V_DOT8_I32_IU4  (i32 8), i32:$src0,
+                                (i32 8), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
+
+def : GCNPat < (int_amdgcn_sdot4 i32:$src0,
+                                 i32:$src1,
+                                 i32:$src2, (i1 timm:$clamp)),
+               (V_DOT4_I32_IU8  (i32 8), i32:$src0,
+                                (i32 8), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
 } // End SubtargetPredicate = HasDot8Insts
 
 def : UDot2Pat<V_DOT2_U32_U16>;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158468.552172.patch
Type: text/x-patch
Size: 4359 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230822/974eb541/attachment.bin>