[PATCH] D97188: [AArch64] Add patterns for add(udot(0, x, y), z) -> udot(z, x, y).

Dave Green via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 22 05:05:41 PST 2021


dmgreen created this revision.
dmgreen added reviewers: SjoerdMeijer, NickGuy, efriedma, fhahn.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.
dmgreen requested review of this revision.
Herald added a project: LLVM.

Given a zero input for a udot, an add can be folded in to take the place of the input.


https://reviews.llvm.org/D97188

Files:
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/neon-dot-product.ll


Index: llvm/test/CodeGen/AArch64/neon-dot-product.ll
===================================================================
--- llvm/test/CodeGen/AArch64/neon-dot-product.ll
+++ llvm/test/CodeGen/AArch64/neon-dot-product.ll
@@ -42,6 +42,43 @@
   ret <4 x i32> %vdot1.i
 }
 
+
+define <2 x i32> @test_vdot_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+entry:
+; CHECK-LABEL: test_vdot_u32_zero:
+; CHECK: udot v0.2s, v1.8b, v2.8b
+  %vdot1.i = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2
+  %ret = add <2 x i32> %vdot1.i, %a
+  ret <2 x i32> %ret
+}
+
+define <4 x i32> @test_vdotq_u32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+entry:
+; CHECK-LABEL: test_vdotq_u32_zero:
+; CHECK: udot v0.4s, v1.16b, v2.16b
+  %vdot1.i = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2
+  %ret = add <4 x i32> %vdot1.i, %a
+  ret <4 x i32> %ret
+}
+
+define <2 x i32> @test_vdot_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+entry:
+; CHECK-LABEL: test_vdot_s32_zero:
+; CHECK: sdot v0.2s, v1.8b, v2.8b
+  %vdot1.i = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %b, <8 x i8> %c) #2
+  %ret = add <2 x i32> %vdot1.i, %a
+  ret <2 x i32> %ret
+}
+
+define <4 x i32> @test_vdotq_s32_zero(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+entry:
+; CHECK-LABEL: test_vdotq_s32_zero:
+; CHECK: sdot v0.4s, v1.16b, v2.16b
+  %vdot1.i = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %b, <16 x i8> %c) #2
+  %ret = add <4 x i32> %vdot1.i, %a
+  ret <4 x i32> %ret
+}
+
 define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 entry:
 ; CHECK-LABEL: test_vdot_lane_u32:
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5622,6 +5622,11 @@
                                          v2i32, v8i8, OpNode>;
   def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128,
                                          v4i32, v16i8, OpNode>;
+
+  def : Pat<(add (v2i32 V64:$Rd), (OpNode (v2i32 immAllZerosV), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+            (!cast<Instruction>(NAME # "v8i8") $Rd, $Rn, $Rm)>;
+  def : Pat<(add (v4i32 V128:$Rd), (OpNode (v4i32 immAllZerosV), (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
+            (!cast<Instruction>(NAME # "v16i8") $Rd, $Rn, $Rm)>;
 }
 
 // ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97188.325419.patch
Type: text/x-patch
Size: 2677 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210222/a8d7bc97/attachment.bin>


More information about the llvm-commits mailing list