[PATCH] D158468: [AMDGPU] Support sdot4 / sdot8 intrinsics on gfx11
Jeffrey Byrnes via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 22 12:17:59 PDT 2023
jrbyrnes updated this revision to Diff 552466.
jrbyrnes added a comment.
Properly handle neg modifier
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D158468/new/
https://reviews.llvm.org/D158468
Files:
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
@@ -4,6 +4,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
@@ -11,6 +12,7 @@
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX908: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11: v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,1,0] clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
@@ -31,6 +33,7 @@
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX908: v_dot8c_i32_i4_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX11: v_dot8_i32_iu4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,1,0]{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -3,12 +3,14 @@
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
+; GFX11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,1,0] clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
@@ -28,6 +30,7 @@
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GF11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} neg_lo:[1,1,0]{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -436,6 +436,20 @@
let SubtargetPredicate = HasDot8Insts in {
defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>;
defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>;
+
+def : GCNPat < (int_amdgcn_sdot8 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT8_I32_IU4 (i32 9), i32:$src0,
+ (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
+
+def : GCNPat < (int_amdgcn_sdot4 i32:$src0,
+ i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (V_DOT4_I32_IU8 (i32 9), i32:$src0,
+ (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp)
+>;
} // End SubtargetPredicate = HasDot8Insts
def : UDot2Pat<V_DOT2_U32_U16>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158468.552466.patch
Type: text/x-patch
Size: 4424 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230822/29555b46/attachment.bin>
More information about the llvm-commits
mailing list