[llvm] [AArch64] Add USDOT to the instruction we perform performAddDotCombine. (PR #171864)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 09:13:49 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/171864
>From 1df2384f5aed2114e05a30a4251353f137d922fb Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 11 Dec 2025 16:23:35 +0000
Subject: [PATCH 1/3] [AArch64] Add tests for add(usdot(zero), a). NFC
---
llvm/test/CodeGen/AArch64/aarch64-matmul.ll | 33 +++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
index e7e9ee7330613..bdb64ef699f34 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s
-; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: smmla.v4i32.v16i8:
@@ -160,8 +160,37 @@ entry:
ret <4 x i32> %vusdot1.i
}
+define <2 x i32> @usdot_add_zero.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: usdot_add_zero.v2i32.v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v3.2d, #0000000000000000
+; CHECK-NEXT: usdot v3.2s, v1.8b, v2.8b
+; CHECK-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %x = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %a, <8 x i8> %b)
+ %y = add <2 x i32> %x, %r
+ ret <2 x i32> %y
+}
+
+define <4 x i32> @usdot_add_zero.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: usdot_add_zero.v4i32.v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v3.2d, #0000000000000000
+; CHECK-NEXT: usdot v3.4s, v1.16b, v2.16b
+; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %x = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %a, <16 x i8> %b)
+ %y = add <4 x i32> %x, %r
+ ret <4 x i32> %y
+}
+
declare <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
>From 9241ad0fbfaade9c9006bb46aa127a709734ed59 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 11 Dec 2025 16:30:05 +0000
Subject: [PATCH 2/3] [AArch64] Add USDOT to the instruction we perform
performAddDotCombine.
This function does
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
Which can equally apply to USDOT too now that we have a node for it.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/test/CodeGen/AArch64/aarch64-matmul.ll | 37 +++++++++++--------
2 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 41caa817c11a4..35d40eb4e6e3f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21813,7 +21813,8 @@ static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
// Handle commutivity
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
- Dot.getOpcode() == AArch64ISD::SDOT) &&
+ Dot.getOpcode() == AArch64ISD::SDOT ||
+ Dot.getOpcode() == AArch64ISD::USDOT) &&
isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
index bdb64ef699f34..c6776f3dd2513 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
@@ -161,12 +161,17 @@ entry:
}
define <2 x i32> @usdot_add_zero.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: usdot_add_zero.v2i32.v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: usdot v3.2s, v1.8b, v2.8b
-; CHECK-NEXT: add v0.2s, v3.2s, v0.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: usdot_add_zero.v2i32.v8i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usdot v0.2s, v1.8b, v2.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: usdot_add_zero.v2i32.v8i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-NEXT: usdot v3.2s, v1.8b, v2.8b
+; CHECK-GI-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-GI-NEXT: ret
entry:
%x = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> zeroinitializer, <8 x i8> %a, <8 x i8> %b)
%y = add <2 x i32> %x, %r
@@ -174,12 +179,17 @@ entry:
}
define <4 x i32> @usdot_add_zero.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: usdot_add_zero.v4i32.v16i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: usdot v3.4s, v1.16b, v2.16b
-; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: usdot_add_zero.v4i32.v16i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usdot v0.4s, v1.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: usdot_add_zero.v4i32.v16i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-NEXT: usdot v3.4s, v1.16b, v2.16b
+; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
+; CHECK-GI-NEXT: ret
entry:
%x = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> %a, <16 x i8> %b)
%y = add <4 x i32> %x, %r
@@ -191,6 +201,3 @@ declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16
declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
>From 68a8cc92672893973c6363ecadf17923da47d518 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 11 Dec 2025 17:11:40 +0000
Subject: [PATCH 3/3] Update neon-dotreduce.ll
---
llvm/test/CodeGen/AArch64/neon-dotreduce.ll | 114 ++++++++------------
1 file changed, 44 insertions(+), 70 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index 4b0d110632959..dbbe00c89eecf 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -1375,11 +1375,9 @@ define i32 @test_usdot_v8i8_double(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i
; CHECK-SD-LABEL: test_usdot_v8i8_double:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
-; CHECK-SD-NEXT: usdot v5.2s, v0.8b, v1.8b
; CHECK-SD-NEXT: usdot v4.2s, v2.8b, v3.8b
-; CHECK-SD-NEXT: add v0.2s, v5.2s, v4.2s
-; CHECK-SD-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-SD-NEXT: usdot v4.2s, v0.8b, v1.8b
+; CHECK-SD-NEXT: addp v0.2s, v4.2s, v4.2s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -1416,11 +1414,9 @@ define i32 @test_usdot_swapped_operands_v8i8_double(<8 x i8> %a, <8 x i8> %b, <8
; CHECK-SD-LABEL: test_usdot_swapped_operands_v8i8_double:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
-; CHECK-SD-NEXT: usdot v5.2s, v1.8b, v0.8b
; CHECK-SD-NEXT: usdot v4.2s, v3.8b, v2.8b
-; CHECK-SD-NEXT: add v0.2s, v5.2s, v4.2s
-; CHECK-SD-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-SD-NEXT: usdot v4.2s, v1.8b, v0.8b
+; CHECK-SD-NEXT: addp v0.2s, v4.2s, v4.2s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -1457,11 +1453,9 @@ define i32 @test_usdot_v16i8_double(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <1
; CHECK-SD-LABEL: test_usdot_v16i8_double:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
-; CHECK-SD-NEXT: usdot v5.4s, v0.16b, v1.16b
; CHECK-SD-NEXT: usdot v4.4s, v2.16b, v3.16b
-; CHECK-SD-NEXT: add v0.4s, v5.4s, v4.4s
-; CHECK-SD-NEXT: addv s0, v0.4s
+; CHECK-SD-NEXT: usdot v4.4s, v0.16b, v1.16b
+; CHECK-SD-NEXT: addv s0, v4.4s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -1509,11 +1503,9 @@ define i32 @test_usdot_swapped_operands_v16i8_double(<16 x i8> %a, <16 x i8> %b,
; CHECK-SD-LABEL: test_usdot_swapped_operands_v16i8_double:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
-; CHECK-SD-NEXT: usdot v5.4s, v1.16b, v0.16b
; CHECK-SD-NEXT: usdot v4.4s, v3.16b, v2.16b
-; CHECK-SD-NEXT: add v0.4s, v5.4s, v4.4s
-; CHECK-SD-NEXT: addv s0, v0.4s
+; CHECK-SD-NEXT: usdot v4.4s, v1.16b, v0.16b
+; CHECK-SD-NEXT: addv s0, v4.4s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -4384,12 +4376,10 @@ define i32 @test_usdot_v32i8(ptr nocapture readonly %a, ptr nocapture readonly %
; CHECK-SD-LABEL: test_usdot_v32i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
-; CHECK-SD-NEXT: ldp q2, q3, [x0]
-; CHECK-SD-NEXT: ldp q4, q5, [x1]
-; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
-; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
-; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ldp q1, q3, [x0]
+; CHECK-SD-NEXT: ldp q2, q4, [x1]
+; CHECK-SD-NEXT: usdot v0.4s, v3.16b, v4.16b
+; CHECK-SD-NEXT: usdot v0.4s, v1.16b, v2.16b
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w8, s0
; CHECK-SD-NEXT: add w0, w8, w2
@@ -4438,15 +4428,11 @@ define i32 @test_usdot_v32i8_double(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <3
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v16.2d, #0000000000000000
; CHECK-SD-NEXT: movi v17.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v18.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v19.2d, #0000000000000000
-; CHECK-SD-NEXT: usdot v16.4s, v1.16b, v3.16b
-; CHECK-SD-NEXT: usdot v18.4s, v0.16b, v2.16b
-; CHECK-SD-NEXT: usdot v17.4s, v4.16b, v6.16b
-; CHECK-SD-NEXT: usdot v19.4s, v5.16b, v7.16b
-; CHECK-SD-NEXT: add v0.4s, v18.4s, v16.4s
-; CHECK-SD-NEXT: add v1.4s, v17.4s, v19.4s
-; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: usdot v17.4s, v1.16b, v3.16b
+; CHECK-SD-NEXT: usdot v16.4s, v5.16b, v7.16b
+; CHECK-SD-NEXT: usdot v17.4s, v0.16b, v2.16b
+; CHECK-SD-NEXT: usdot v16.4s, v4.16b, v6.16b
+; CHECK-SD-NEXT: add v0.4s, v17.4s, v16.4s
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
@@ -8781,20 +8767,16 @@ define i32 @test_usdot_v64i8(ptr nocapture readonly %a, ptr nocapture readonly %
; CHECK-SD-LABEL: test_usdot_v64i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v3.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v4.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
-; CHECK-SD-NEXT: ldp q1, q2, [x0, #32]
-; CHECK-SD-NEXT: ldp q6, q7, [x1, #32]
-; CHECK-SD-NEXT: ldp q16, q17, [x0]
-; CHECK-SD-NEXT: ldp q18, q19, [x1]
-; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v7.16b
-; CHECK-SD-NEXT: usdot v5.4s, v1.16b, v6.16b
-; CHECK-SD-NEXT: usdot v4.4s, v17.16b, v19.16b
-; CHECK-SD-NEXT: usdot v3.4s, v16.16b, v18.16b
-; CHECK-SD-NEXT: add v0.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: add v1.4s, v3.4s, v5.4s
-; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT: ldp q2, q3, [x0, #32]
+; CHECK-SD-NEXT: ldp q4, q5, [x1, #32]
+; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
+; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
+; CHECK-SD-NEXT: ldp q2, q3, [x0]
+; CHECK-SD-NEXT: ldp q4, q5, [x1]
+; CHECK-SD-NEXT: usdot v1.4s, v3.16b, v5.16b
+; CHECK-SD-NEXT: usdot v0.4s, v2.16b, v4.16b
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w8, s0
; CHECK-SD-NEXT: add w0, w8, w2
@@ -8863,32 +8845,24 @@ entry:
define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
; CHECK-SD-LABEL: test_usdot_v64i8_double:
; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v16.2d, #0000000000000000
+; CHECK-SD-NEXT: movi v17.2d, #0000000000000000
; CHECK-SD-NEXT: movi v18.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v21.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v22.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v23.2d, #0000000000000000
-; CHECK-SD-NEXT: ldp q16, q17, [sp, #64]
-; CHECK-SD-NEXT: movi v24.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v25.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v26.2d, #0000000000000000
-; CHECK-SD-NEXT: movi v27.2d, #0000000000000000
-; CHECK-SD-NEXT: ldp q19, q20, [sp, #96]
-; CHECK-SD-NEXT: usdot v18.4s, v3.16b, v7.16b
-; CHECK-SD-NEXT: ldp q3, q7, [sp, #32]
-; CHECK-SD-NEXT: usdot v21.4s, v1.16b, v5.16b
-; CHECK-SD-NEXT: ldp q1, q5, [sp]
-; CHECK-SD-NEXT: usdot v22.4s, v2.16b, v6.16b
-; CHECK-SD-NEXT: usdot v23.4s, v0.16b, v4.16b
-; CHECK-SD-NEXT: usdot v24.4s, v7.16b, v20.16b
-; CHECK-SD-NEXT: usdot v27.4s, v3.16b, v19.16b
-; CHECK-SD-NEXT: usdot v26.4s, v5.16b, v17.16b
-; CHECK-SD-NEXT: usdot v25.4s, v1.16b, v16.16b
-; CHECK-SD-NEXT: add v0.4s, v21.4s, v18.4s
-; CHECK-SD-NEXT: add v1.4s, v23.4s, v22.4s
-; CHECK-SD-NEXT: add v2.4s, v26.4s, v24.4s
-; CHECK-SD-NEXT: add v3.4s, v25.4s, v27.4s
-; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: add v1.4s, v3.4s, v2.4s
+; CHECK-SD-NEXT: movi v19.2d, #0000000000000000
+; CHECK-SD-NEXT: ldp q20, q21, [sp, #96]
+; CHECK-SD-NEXT: ldp q22, q23, [sp, #32]
+; CHECK-SD-NEXT: usdot v16.4s, v3.16b, v7.16b
+; CHECK-SD-NEXT: usdot v18.4s, v2.16b, v6.16b
+; CHECK-SD-NEXT: usdot v19.4s, v23.16b, v21.16b
+; CHECK-SD-NEXT: usdot v17.4s, v22.16b, v20.16b
+; CHECK-SD-NEXT: ldp q2, q3, [sp, #64]
+; CHECK-SD-NEXT: ldp q6, q7, [sp]
+; CHECK-SD-NEXT: usdot v16.4s, v1.16b, v5.16b
+; CHECK-SD-NEXT: usdot v18.4s, v0.16b, v4.16b
+; CHECK-SD-NEXT: usdot v19.4s, v7.16b, v3.16b
+; CHECK-SD-NEXT: usdot v17.4s, v6.16b, v2.16b
+; CHECK-SD-NEXT: add v0.4s, v18.4s, v16.4s
+; CHECK-SD-NEXT: add v1.4s, v17.4s, v19.4s
; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w0, s0
More information about the llvm-commits
mailing list