[PATCH] D97279: [AArch64] Extend vecreduce -> udot handling to v8i8
Dave Green via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 23 06:11:24 PST 2021
dmgreen created this revision.
dmgreen added reviewers: mivnay, SjoerdMeijer, fhahn.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.
dmgreen requested review of this revision.
Herald added a project: LLVM.
https://reviews.llvm.org/D88577 added v16i8 vecreduce to udot/sdot lowering. This extends that to v8i8 too, generalizing the pattern to handle the extra types.
https://reviews.llvm.org/D97279
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/neon-dotreduce.ll
Index: llvm/test/CodeGen/AArch64/neon-dotreduce.ll
===================================================================
--- llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -31,10 +31,10 @@
; CHECK-LABEL: test_udot_v8i8_nomla:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: movi v2.8b, #1
+; CHECK-NEXT: udot v1.2s, v2.8b, v0.8b
+; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
@@ -72,10 +72,10 @@
; CHECK-LABEL: test_sdot_v8i8_nomla:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-NEXT: saddw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: movi v2.8b, #1
+; CHECK-NEXT: sdot v1.2s, v2.8b, v0.8b
+; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
@@ -207,14 +207,11 @@
define i32 @test_udot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_udot_v8i8_double_nomla:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v2.8b, #0
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
-; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: movi v3.8b, #1
+; CHECK-NEXT: udot v1.2s, v3.8b, v2.8b
+; CHECK-NEXT: udot v1.2s, v3.8b, v0.8b
+; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
@@ -322,14 +319,11 @@
define i32 @test_sdot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; CHECK-LABEL: test_sdot_v8i8_double_nomla:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v2.8b, #0
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-NEXT: saddw2 v0.4s, v2.4s, v0.8h
-; CHECK-NEXT: saddw2 v1.4s, v3.4s, v1.8h
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: movi v3.8b, #1
+; CHECK-NEXT: sdot v1.2s, v3.8b, v2.8b
+; CHECK-NEXT: sdot v1.2s, v3.8b, v0.8b
+; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11754,12 +11754,13 @@
return SDValue();
EVT Op0VT = Op0.getOperand(0).getValueType();
- if (Op0VT != MVT::v16i8)
+ if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
return SDValue();
SDLoc DL(Op0);
SDValue Ones = DAG.getConstant(1, DL, Op0VT);
- SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
+ SDValue Zeros =
+ DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
auto DotOpcode =
(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97279.325758.patch
Type: text/x-patch
Size: 3652 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210223/59e2613c/attachment.bin>
More information about the llvm-commits
mailing list