[llvm] [AArch64][GlobalISel] Support udot lowering for vecreduce add (PR #70784)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 09:51:37 PDT 2023
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/70784
>From 1a9e99720868f8fa23c377754c586e0ffb90e0ab Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Wed, 1 Nov 2023 11:40:08 +0000
Subject: [PATCH 1/2] [AArch64][GlobalISel] Pre-Commit for UDOT lowering for
G_VECREDUCE_ADD
---
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 6539 ++++++++++----------
1 file changed, 3313 insertions(+), 3226 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index a88c930d09e9b17..2a512aaf9b2cc8d 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -1,9 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 -mattr=+dotprod %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE,CHECK-SD-BASE
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-DOT
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-BASE,CHECK-GI-BASE
-; CHECK-GI: warning: Instruction selection used fallback path for full
define i32 @addv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: addv_v2i32:
@@ -50,25 +49,25 @@ entry:
}
define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
-; CHECK-BASE-LABEL: add_v4i32_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i32_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i32_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i32_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i32_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i32_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -76,25 +75,25 @@ entry:
}
define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
-; CHECK-BASE-LABEL: add_v4i32_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i32_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i32_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i32_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i32_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i32_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -128,25 +127,25 @@ entry:
}
define i32 @add_v8i16_v8i32_zext(<8 x i16> %x) {
-; CHECK-BASE-LABEL: add_v8i16_v8i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlv s0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -154,25 +153,25 @@ entry:
}
define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
-; CHECK-BASE-LABEL: add_v8i16_v8i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlv s0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlv s0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -206,64 +205,64 @@ entry:
}
define zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) {
-; CHECK-BASE-LABEL: add_v8i16_v8i16:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i16:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: uxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i16:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i16:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i16:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: uxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
ret i16 %z
}
define i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
-; CHECK-BASE-LABEL: add_v8i16_v8i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -271,40 +270,40 @@ entry:
}
define i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
-; CHECK-BASE-LABEL: add_v8i16_v8i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -312,28 +311,28 @@ entry:
}
define i64 @add_v4i16_v4i64_zext(<4 x i16> %x) {
-; CHECK-BASE-LABEL: add_v4i16_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i16_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i16_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i16_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i16_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i16_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -341,28 +340,28 @@ entry:
}
define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
-; CHECK-BASE-LABEL: add_v4i16_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i16_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i16_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i16_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i16_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i16_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -370,32 +369,32 @@ entry:
}
define i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
-; CHECK-BASE-LABEL: add_v2i16_v2i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v2i16_v2i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v2i16_v2i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v2i16_v2i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v2i16_v2i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v2i16_v2i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x0000000000ffff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -418,38 +417,38 @@ entry:
}
define i32 @add_v16i8_v16i32_zext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
-; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.16b, #1
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
-; CHECK-DOT-NEXT: addv s0, v2.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
-; CHECK-GI-NEXT: uaddw2 v0.4s, v3.4s, v0.8h
-; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.16b, #1
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v3.4s, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i32>
%z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -457,38 +456,38 @@ entry:
}
define i32 @add_v16i8_v16i32_sext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h
-; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.16b, #1
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
-; CHECK-DOT-NEXT: addv s0, v2.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v1.4s, v2.4s, v1.8h
-; CHECK-GI-NEXT: saddw2 v0.4s, v3.4s, v0.8h
-; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.16b, #1
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.4s, v2.4s, v1.8h
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v3.4s, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i32>
%z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -496,30 +495,30 @@ entry:
}
define i32 @add_v8i8_v8i32_zext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: uaddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: udot v1.2s, v0.8b, v2.8b
-; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v2.8b, #1
+; CHECK-SD-DOT-NEXT: udot v1.2s, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -527,30 +526,30 @@ entry:
}
define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b
-; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v2.8b, #1
+; CHECK-SD-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -558,30 +557,30 @@ entry:
}
define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
-; CHECK-BASE-LABEL: add_v4i8_v4i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -604,28 +603,28 @@ entry:
}
define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i16_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlp v0.8h, v0.16b
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i16_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlp v0.8h, v0.16b
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i16_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: uxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i16_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlp v0.8h, v0.16b
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i16_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlp v0.8h, v0.16b
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i16_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: uxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -633,28 +632,28 @@ entry:
}
define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i16_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlp v0.8h, v0.16b
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: smov w0, v0.h[0]
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i16_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlp v0.8h, v0.16b
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: smov w0, v0.h[0]
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i16_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: saddw2 v0.8h, v1.8h, v0.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i16_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlp v0.8h, v0.16b
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i16_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlp v0.8h, v0.16b
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i16_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -662,27 +661,27 @@ entry:
}
define zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i16_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i16_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i16_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: uxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i16_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i16_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i16_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: uxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -690,27 +689,27 @@ entry:
}
define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i16_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: smov w0, v0.h[0]
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i16_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: smov w0, v0.h[0]
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i16_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i16_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i16_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i16_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -718,90 +717,90 @@ entry:
}
define zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i8:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: addv b0, v0.16b
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i8:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: addv b0, v0.16b
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv b0, v0.16b
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: uxtb w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i8:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: addv b0, v0.16b
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: addv b0, v0.16b
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i8:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv b0, v0.16b
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: uxtb w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
ret i8 %z
}
define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v2.4s, v1.8h, #0
-; CHECK-BASE-NEXT: ushll2 v3.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
-; CHECK-BASE-NEXT: uaddl v2.2d, v3.2s, v2.2s
-; CHECK-BASE-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll2 v2.4s, v1.8h, #0
-; CHECK-DOT-NEXT: ushll2 v3.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
-; CHECK-DOT-NEXT: uaddl v2.2d, v3.2s, v2.2s
-; CHECK-DOT-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
-; CHECK-GI-NEXT: add v1.2d, v2.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-BASE-NEXT: uaddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-DOT-NEXT: uaddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v5.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v7.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -809,66 +808,66 @@ entry:
}
define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
-; CHECK-BASE-LABEL: add_v16i8_v16i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v2.4s, v1.8h, #0
-; CHECK-BASE-NEXT: sshll2 v3.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v4.2d, v3.4s, v2.4s
-; CHECK-BASE-NEXT: saddl v2.2d, v3.2s, v2.2s
-; CHECK-BASE-NEXT: saddl2 v5.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll2 v2.4s, v1.8h, #0
-; CHECK-DOT-NEXT: sshll2 v3.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v4.2d, v3.4s, v2.4s
-; CHECK-DOT-NEXT: saddl v2.2d, v3.2s, v2.2s
-; CHECK-DOT-NEXT: saddl2 v5.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v5.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v7.2d, v0.4s
-; CHECK-GI-NEXT: add v1.2d, v2.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-BASE-NEXT: saddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-BASE-NEXT: saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-DOT-NEXT: saddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-DOT-NEXT: saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v5.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v7.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -876,43 +875,43 @@ entry:
}
define i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -920,43 +919,43 @@ entry:
}
define i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
-; CHECK-BASE-LABEL: add_v8i8_v8i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -964,34 +963,34 @@ entry:
}
define i64 @add_v4i8_v4i64_zext(<4 x i8> %x) {
-; CHECK-BASE-LABEL: add_v4i8_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: and v2.16b, v2.16b, v1.16b
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -999,44 +998,44 @@ entry:
}
define i64 @add_v4i8_v4i64_sext(<4 x i8> %x) {
-; CHECK-BASE-LABEL: add_v4i8_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-BASE-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-BASE-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-DOT-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-DOT-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.2d, v0.4s, #0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-GI-NEXT: addp d0, v1.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: addp d0, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1044,32 +1043,32 @@ entry:
}
define i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
-; CHECK-BASE-LABEL: add_v2i8_v2i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v2i8_v2i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v2i8_v2i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x0, d0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v2i8_v2i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v2i8_v2i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v2i8_v2i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x0, d0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -1116,28 +1115,28 @@ entry:
}
define i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i32_v4i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i32_v4i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i32_v4i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1146,28 +1145,28 @@ entry:
}
define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i32_v4i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i32_v4i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i32_v4i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1206,28 +1205,28 @@ entry:
}
define i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v8i16_v8i32_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i32_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlv s0, v0.8h
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i32_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1236,28 +1235,28 @@ entry:
}
define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v8i16_v8i32_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i32_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlv s0, v0.8h
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i32_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlv s0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1296,29 +1295,29 @@ entry:
}
define zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) {
-; CHECK-BASE-LABEL: add_v8i16_v8i16_acc:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: and w0, w8, #0xffff
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i16_acc:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: and w0, w8, #0xffff
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i16_acc:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i16_acc:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i16_acc:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i16_acc:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
%r = add i16 %z, %a
@@ -1326,43 +1325,43 @@ entry:
}
define i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v8i16_v8i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -1371,43 +1370,43 @@ entry:
}
define i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v8i16_v8i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i16_v8i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i16_v8i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -1416,31 +1415,31 @@ entry:
}
define i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i16_v4i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i16_v4i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i16_v4i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1449,31 +1448,31 @@ entry:
}
define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i16_v4i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i16_v4i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i16_v4i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v1.2d, v0.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1482,35 +1481,35 @@ entry:
}
define i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v2i16_v2i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v2i16_v2i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v2i16_v2i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x0000000000ffff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -1536,41 +1535,41 @@ entry:
}
define i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
-; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.16b, #1
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
-; CHECK-DOT-NEXT: addv s0, v2.4s
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i32_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
-; CHECK-GI-NEXT: uaddw2 v0.4s, v3.4s, v0.8h
-; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.16b, #1
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v3.4s, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i32>
%z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -1579,41 +1578,41 @@ entry:
}
define i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h
-; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.16b, #1
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
-; CHECK-DOT-NEXT: addv s0, v2.4s
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i32_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v1.4s, v2.4s, v1.8h
-; CHECK-GI-NEXT: saddw2 v0.4s, v3.4s, v0.8h
-; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-SD-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.16b, #1
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.4s, v2.4s, v1.8h
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v3.4s, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i32>
%z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -1622,33 +1621,33 @@ entry:
}
define i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: uaddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: udot v1.2s, v0.8b, v2.8b
-; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i32_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v2.8b, #1
+; CHECK-SD-DOT-NEXT: udot v1.2s, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1657,33 +1656,33 @@ entry:
}
define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlv s0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b
-; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i32_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v1.4s, v0.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v2.8b, #1
+; CHECK-SD-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v1.2s, v1.2s
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1692,33 +1691,33 @@ entry:
}
define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
-; CHECK-BASE-LABEL: add_v4i8_v4i32_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w0, w8, w0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i32_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w0, w8, w0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i32_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w0, w8, w0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w0, w8, w0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w0, w8, w0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -1744,31 +1743,31 @@ entry:
}
define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i16_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlv h0, v0.16b
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: and w0, w8, #0xffff
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i16_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlv h0, v0.16b
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: and w0, w8, #0xffff
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i16_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlv h0, v0.16b
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlv h0, v0.16b
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1777,31 +1776,31 @@ entry:
}
define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i16_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlv h0, v0.16b
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: sxth w0, w8
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i16_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlv h0, v0.16b
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: sxth w0, w8
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i16_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: saddw2 v0.8h, v1.8h, v0.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxth
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlv h0, v0.16b
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: sxth w0, w8
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlv h0, v0.16b
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: sxth w0, w8
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1810,32 +1809,32 @@ entry:
}
define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i16_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: and w0, w8, #0xffff
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i16_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: and w0, w8, #0xffff
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i16_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -1844,32 +1843,32 @@ entry:
}
define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i16_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: sxth w0, w8
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i16_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: sxth w0, w8
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i16_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxth
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: sxth w0, w8
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: sxth w0, w8
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -1878,29 +1877,29 @@ entry:
}
define zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i8_acc:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: addv b0, v0.16b
-; CHECK-BASE-NEXT: fmov w8, s0
-; CHECK-BASE-NEXT: add w8, w8, w0
-; CHECK-BASE-NEXT: and w0, w8, #0xff
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i8_acc:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: addv b0, v0.16b
-; CHECK-DOT-NEXT: fmov w8, s0
-; CHECK-DOT-NEXT: add w8, w8, w0
-; CHECK-DOT-NEXT: and w0, w8, #0xff
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i8_acc:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv b0, v0.16b
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: add w8, w0, w8, uxtb
-; CHECK-GI-NEXT: and w0, w8, #0xff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i8_acc:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: addv b0, v0.16b
+; CHECK-SD-BASE-NEXT: fmov w8, s0
+; CHECK-SD-BASE-NEXT: add w8, w8, w0
+; CHECK-SD-BASE-NEXT: and w0, w8, #0xff
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i8_acc:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: addv b0, v0.16b
+; CHECK-SD-DOT-NEXT: fmov w8, s0
+; CHECK-SD-DOT-NEXT: add w8, w8, w0
+; CHECK-SD-DOT-NEXT: and w0, w8, #0xff
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i8_acc:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv b0, v0.16b
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxtb
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xff
+; CHECK-GI-BASE-NEXT: ret
entry:
%z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
%r = add i8 %z, %a
@@ -1908,69 +1907,69 @@ entry:
}
define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v2.4s, v1.8h, #0
-; CHECK-BASE-NEXT: ushll2 v3.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
-; CHECK-BASE-NEXT: uaddl v2.2d, v3.2s, v2.2s
-; CHECK-BASE-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll2 v2.4s, v1.8h, #0
-; CHECK-DOT-NEXT: ushll2 v3.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
-; CHECK-DOT-NEXT: uaddl v2.2d, v3.2s, v2.2s
-; CHECK-DOT-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
-; CHECK-GI-NEXT: add v1.2d, v2.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-BASE-NEXT: uaddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v2.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-DOT-NEXT: uaddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v5.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v7.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -1979,69 +1978,69 @@ entry:
}
define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v16i8_v16i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v2.4s, v1.8h, #0
-; CHECK-BASE-NEXT: sshll2 v3.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v4.2d, v3.4s, v2.4s
-; CHECK-BASE-NEXT: saddl v2.2d, v3.2s, v2.2s
-; CHECK-BASE-NEXT: saddl2 v5.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v16i8_v16i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll2 v2.4s, v1.8h, #0
-; CHECK-DOT-NEXT: sshll2 v3.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v4.2d, v3.4s, v2.4s
-; CHECK-DOT-NEXT: saddl v2.2d, v3.2s, v2.2s
-; CHECK-DOT-NEXT: saddl2 v5.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v1.2d, v5.2d, v4.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v16i8_v16i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v5.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v7.2d, v0.4s
-; CHECK-GI-NEXT: add v1.2d, v2.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-BASE-NEXT: saddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-BASE-NEXT: saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v2.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-SD-DOT-NEXT: saddl v2.2d, v3.2s, v2.2s
+; CHECK-SD-DOT-NEXT: saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v1.2d, v5.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v5.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v7.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -2050,46 +2049,46 @@ entry:
}
define i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2098,46 +2097,46 @@ entry:
}
define i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v8i8_v8i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v8i8_v8i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v8i8_v8i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: saddw2 v1.2d, v2.2d, v1.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v3.2d, v0.4s
-; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2146,37 +2145,37 @@ entry:
}
define i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i8_v4i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlv d0, v0.4s
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlv d0, v0.4s
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlv d0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: and v2.16b, v2.16b, v1.16b
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2185,47 +2184,47 @@ entry:
}
define i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v4i8_v4i64_acc_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-BASE-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-BASE-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v4i8_v4i64_acc_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-DOT-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-DOT-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v4i8_v4i64_acc_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.2d, v0.4s, #0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: ssra v1.2d, v0.2d, #56
-; CHECK-GI-NEXT: addp d0, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: addp d0, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2234,35 +2233,35 @@ entry:
}
define i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
-; CHECK-BASE-LABEL: add_v2i8_v2i64_acc_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x8, d0
-; CHECK-BASE-NEXT: add x0, x8, x0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_v2i8_v2i64_acc_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x8, d0
-; CHECK-DOT-NEXT: add x0, x8, x0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_v2i8_v2i64_acc_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x0, x8, x0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x8, d0
+; CHECK-SD-BASE-NEXT: add x0, x8, x0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x8, d0
+; CHECK-SD-DOT-NEXT: add x0, x8, x0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: add x0, x8, x0
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2301,28 +2300,28 @@ entry:
}
define i32 @add_pair_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i32_v4i32:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i32_v4i32:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i32_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i32_v4i32:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i32_v4i32:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i32_v4i32:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
%z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
@@ -2331,34 +2330,34 @@ entry:
}
define i64 @add_pair_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i32_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-BASE-NEXT: uadalp v1.2d, v0.4s
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i32_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-DOT-NEXT: uadalp v1.2d, v0.4s
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i32_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v1.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-BASE-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-DOT-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2369,34 +2368,34 @@ entry:
}
define i64 @add_pair_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i32_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlp v1.2d, v1.4s
-; CHECK-BASE-NEXT: sadalp v1.2d, v0.4s
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i32_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlp v1.2d, v1.4s
-; CHECK-DOT-NEXT: sadalp v1.2d, v0.4s
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i32_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v1.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v2.2d, v0.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v3.2d, v1.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlp v1.2d, v1.4s
+; CHECK-SD-BASE-NEXT: sadalp v1.2d, v0.4s
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlp v1.2d, v1.4s
+; CHECK-SD-DOT-NEXT: sadalp v1.2d, v0.4s
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2407,30 +2406,30 @@ entry:
}
define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i32_v2i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i32_v2i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i32_v2i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i32> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2441,30 +2440,30 @@ entry:
}
define i64 @add_pair_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i32_v2i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i32_v2i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i32_v2i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <2 x i32> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2475,34 +2474,34 @@ entry:
}
define i32 @add_pair_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v8i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: uadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v1.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i16_v8i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-DOT-NEXT: uadalp v1.4s, v0.8h
-; CHECK-DOT-NEXT: addv s0, v1.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v8i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
-; CHECK-GI-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: addv s0, v1.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-DOT-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-DOT-NEXT: addv s0, v1.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -2513,34 +2512,34 @@ entry:
}
define i32 @add_pair_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v8i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: sadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v1.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i16_v8i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlp v1.4s, v1.8h
-; CHECK-DOT-NEXT: sadalp v1.4s, v0.8h
-; CHECK-DOT-NEXT: addv s0, v1.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v8i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v2.4s, v0.8h
-; CHECK-GI-NEXT: saddw2 v1.4s, v3.4s, v1.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: sadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: addv s0, v1.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlp v1.4s, v1.8h
+; CHECK-SD-DOT-NEXT: sadalp v1.4s, v0.8h
+; CHECK-SD-DOT-NEXT: addv s0, v1.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-BASE-NEXT: saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -2551,30 +2550,30 @@ entry:
}
define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i16_v4i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i16_v4i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i16_v4i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -2585,30 +2584,30 @@ entry:
}
define i32 @add_pair_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i16_v4i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i16_v4i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddl v0.4s, v0.4h, v1.4h
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i16_v4i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -2618,30 +2617,174 @@ entry:
ret i32 %z
}
-define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v8i16:
+define i32 @test_udot_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-BASE-LABEL: test_udot_v8i8:
; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-BASE-NEXT: addv h0, v0.8h
+; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT: umull v2.4s, v1.4h, v0.4h
+; CHECK-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h
+; CHECK-BASE-NEXT: addv s0, v2.4s
+; CHECK-BASE-NEXT: fmov w0, s0
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: test_udot_v8i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v2.2s, v1.8b, v0.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+entry:
+ %0 = zext <8 x i8> %a to <8 x i32>
+ %1 = zext <8 x i8> %b to <8 x i32>
+ %2 = mul nuw nsw <8 x i32> %1, %0
+ %3 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %2)
+ ret i32 %3
+}
+
+define i32 @test_udot_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SD-BASE-LABEL: test_udot_v16i8:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: umull v4.4s, v3.4h, v2.4h
+; CHECK-SD-BASE-NEXT: umull2 v2.4s, v3.8h, v2.8h
+; CHECK-SD-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h
+; CHECK-SD-BASE-NEXT: umlal v4.4s, v1.4h, v0.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v4.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: test_udot_v16i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v2.4s, v1.16b, v0.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: test_udot_v16i8:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: umull v4.4s, v3.4h, v2.4h
+; CHECK-GI-BASE-NEXT: umull v5.4s, v1.4h, v0.4h
+; CHECK-GI-BASE-NEXT: umlal2 v4.4s, v3.8h, v2.8h
+; CHECK-GI-BASE-NEXT: umlal2 v5.4s, v1.8h, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v5.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
+entry:
+ %0 = zext <16 x i8> %a to <16 x i32>
+ %1 = zext <16 x i8> %b to <16 x i32>
+ %2 = mul nuw nsw <16 x i32> %1, %0
+ %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2)
+ ret i32 %3
+}
+
+define i32 @test_sdot_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-BASE-LABEL: test_sdot_v8i8:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT: smull v2.4s, v1.4h, v0.4h
+; CHECK-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h
+; CHECK-BASE-NEXT: addv s0, v2.4s
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
-; CHECK-DOT-LABEL: add_pair_v8i16_v8i16:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: addv h1, v1.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-DOT-LABEL: test_sdot_v8i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: sdot v2.2s, v1.8b, v0.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+entry:
+ %0 = sext <8 x i8> %a to <8 x i32>
+ %1 = sext <8 x i8> %b to <8 x i32>
+ %2 = mul nuw nsw <8 x i32> %1, %0
+ %3 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %2)
+ ret i32 %3
+}
+
+define i32 @test_sdot_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SD-BASE-LABEL: test_sdot_v16i8:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: smull v4.4s, v3.4h, v2.4h
+; CHECK-SD-BASE-NEXT: smull2 v2.4s, v3.8h, v2.8h
+; CHECK-SD-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h
+; CHECK-SD-BASE-NEXT: smlal v4.4s, v1.4h, v0.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v4.4s, v2.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: test_sdot_v16i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: sdot v2.4s, v1.16b, v0.16b
+; CHECK-SD-DOT-NEXT: addv s0, v2.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: test_sdot_v16i8:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: smull v4.4s, v3.4h, v2.4h
+; CHECK-GI-BASE-NEXT: smull v5.4s, v1.4h, v0.4h
+; CHECK-GI-BASE-NEXT: smlal2 v4.4s, v3.8h, v2.8h
+; CHECK-GI-BASE-NEXT: smlal2 v5.4s, v1.8h, v0.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v5.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
+entry:
+ %0 = sext <16 x i8> %a to <16 x i32>
+ %1 = sext <16 x i8> %b to <16 x i32>
+ %2 = mul nuw nsw <16 x i32> %1, %0
+ %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2)
+ ret i32 %3
+}
+
+define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v8i16:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v8i16:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v8i16:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: addv h1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
%z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %y)
@@ -2650,62 +2793,62 @@ entry:
}
define i64 @add_pair_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v8i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll2 v3.4s, v1.8h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v2.2s
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i16_v8i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll2 v3.4s, v1.8h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v2.2s
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v8i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v1.2s, #0
-; CHECK-GI-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
-; CHECK-GI-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v5.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v7.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2716,62 +2859,62 @@ entry:
}
define i64 @add_pair_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v8i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: sshll2 v3.4s, v1.8h, #0
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: saddl2 v4.2d, v0.4s, v2.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v2.2s
-; CHECK-BASE-NEXT: saddl2 v2.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i16_v8i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: sshll2 v3.4s, v1.8h, #0
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: saddl2 v4.2d, v0.4s, v2.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v2.2s
-; CHECK-DOT-NEXT: saddl2 v2.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v8i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v1.2s, #0
-; CHECK-GI-NEXT: saddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v5.2d, v0.4s
-; CHECK-GI-NEXT: saddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v7.2d, v1.4s
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v5.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v7.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-BASE-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2782,40 +2925,40 @@ entry:
}
define i64 @add_pair_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i16_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-BASE-NEXT: uadalp v1.2d, v0.4s
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i16_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-DOT-NEXT: uadalp v1.2d, v0.4s
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i16_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v1.2s, #0
-; CHECK-GI-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-BASE-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-DOT-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2826,40 +2969,40 @@ entry:
}
define i64 @add_pair_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i16_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: saddlp v1.2d, v1.4s
-; CHECK-BASE-NEXT: sadalp v1.2d, v0.4s
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i16_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: saddlp v1.2d, v1.4s
-; CHECK-DOT-NEXT: sadalp v1.2d, v0.4s
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i16_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v2.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v1.2s, #0
-; CHECK-GI-NEXT: saddw2 v0.2d, v2.2d, v0.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v3.2d, v1.4s
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: saddlp v1.2d, v1.4s
+; CHECK-SD-BASE-NEXT: sadalp v1.2d, v0.4s
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: saddlp v1.2d, v1.4s
+; CHECK-SD-DOT-NEXT: sadalp v1.2d, v0.4s
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v2.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v3.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2870,39 +3013,39 @@ entry:
}
define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i16_v2i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-BASE-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i16_v2i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-DOT-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i16_v2i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2d, #0x0000000000ffff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v2.8b
+; CHECK-SD-BASE-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v2.2d, #0x0000000000ffff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2913,44 +3056,44 @@ entry:
}
define i64 @add_pair_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i16_v2i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-BASE-NEXT: shl v0.2d, v0.2d, #48
-; CHECK-BASE-NEXT: shl v1.2d, v1.2d, #48
-; CHECK-BASE-NEXT: sshr v0.2d, v0.2d, #48
-; CHECK-BASE-NEXT: ssra v0.2d, v1.2d, #48
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i16_v2i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-DOT-NEXT: shl v0.2d, v0.2d, #48
-; CHECK-DOT-NEXT: shl v1.2d, v1.2d, #48
-; CHECK-DOT-NEXT: sshr v0.2d, v0.2d, #48
-; CHECK-DOT-NEXT: ssra v0.2d, v1.2d, #48
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i16_v2i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #48
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #48
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #48
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #48
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-BASE-NEXT: shl v0.2d, v0.2d, #48
+; CHECK-SD-BASE-NEXT: shl v1.2d, v1.2d, #48
+; CHECK-SD-BASE-NEXT: sshr v0.2d, v0.2d, #48
+; CHECK-SD-BASE-NEXT: ssra v0.2d, v1.2d, #48
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-DOT-NEXT: shl v0.2d, v0.2d, #48
+; CHECK-SD-DOT-NEXT: shl v1.2d, v1.2d, #48
+; CHECK-SD-DOT-NEXT: sshr v0.2d, v0.2d, #48
+; CHECK-SD-DOT-NEXT: ssra v0.2d, v1.2d, #48
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: shl v0.2d, v0.2d, #48
+; CHECK-GI-BASE-NEXT: shl v1.2d, v1.2d, #48
+; CHECK-GI-BASE-NEXT: sshr v0.2d, v0.2d, #48
+; CHECK-GI-BASE-NEXT: sshr v1.2d, v1.2d, #48
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <2 x i16> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2961,55 +3104,55 @@ entry:
}
define i32 @add_pair_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v2.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v3.8h, v1.16b, #0
-; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: uaddl2 v4.4s, v0.8h, v2.8h
-; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v2.4h
-; CHECK-BASE-NEXT: uaddl2 v2.4s, v1.8h, v3.8h
-; CHECK-BASE-NEXT: uaddl v1.4s, v1.4h, v3.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v4.4s
-; CHECK-BASE-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v2.16b, #1
-; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000
-; CHECK-DOT-NEXT: udot v3.4s, v1.16b, v2.16b
-; CHECK-DOT-NEXT: udot v3.4s, v0.16b, v2.16b
-; CHECK-DOT-NEXT: addv s0, v3.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: ushll v7.4s, v1.4h, #0
-; CHECK-GI-NEXT: uaddw2 v2.4s, v4.4s, v2.8h
-; CHECK-GI-NEXT: uaddw2 v0.4s, v5.4s, v0.8h
-; CHECK-GI-NEXT: uaddw2 v3.4s, v6.4s, v3.8h
-; CHECK-GI-NEXT: uaddw2 v1.4s, v7.4s, v1.8h
-; CHECK-GI-NEXT: add v0.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: add v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v4.4s, v0.8h, v2.8h
+; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v2.4h
+; CHECK-SD-BASE-NEXT: uaddl2 v2.4s, v1.8h, v3.8h
+; CHECK-SD-BASE-NEXT: uaddl v1.4s, v1.4h, v3.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-SD-BASE-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.16b, #1
+; CHECK-SD-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v3.4s, v1.16b, v2.16b
+; CHECK-SD-DOT-NEXT: udot v3.4s, v0.16b, v2.16b
+; CHECK-SD-DOT-NEXT: addv s0, v3.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v5.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v6.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v7.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v2.4s, v4.4s, v2.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v5.4s, v0.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v3.4s, v6.4s, v3.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v7.4s, v1.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -3020,55 +3163,55 @@ entry:
}
define i32 @add_pair_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v2.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v3.8h, v1.16b, #0
-; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: saddl2 v4.4s, v0.8h, v2.8h
-; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v2.4h
-; CHECK-BASE-NEXT: saddl2 v2.4s, v1.8h, v3.8h
-; CHECK-BASE-NEXT: saddl v1.4s, v1.4h, v3.4h
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v4.4s
-; CHECK-BASE-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v2.16b, #1
-; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000
-; CHECK-DOT-NEXT: sdot v3.4s, v1.16b, v2.16b
-; CHECK-DOT-NEXT: sdot v3.4s, v0.16b, v2.16b
-; CHECK-DOT-NEXT: addv s0, v3.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: sshll v7.4s, v1.4h, #0
-; CHECK-GI-NEXT: saddw2 v2.4s, v4.4s, v2.8h
-; CHECK-GI-NEXT: saddw2 v0.4s, v5.4s, v0.8h
-; CHECK-GI-NEXT: saddw2 v3.4s, v6.4s, v3.8h
-; CHECK-GI-NEXT: saddw2 v1.4s, v7.4s, v1.8h
-; CHECK-GI-NEXT: add v0.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: add v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v2.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: saddl2 v4.4s, v0.8h, v2.8h
+; CHECK-SD-BASE-NEXT: saddl v0.4s, v0.4h, v2.4h
+; CHECK-SD-BASE-NEXT: saddl2 v2.4s, v1.8h, v3.8h
+; CHECK-SD-BASE-NEXT: saddl v1.4s, v1.4h, v3.4h
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-SD-BASE-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.16b, #1
+; CHECK-SD-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: sdot v3.4s, v1.16b, v2.16b
+; CHECK-SD-DOT-NEXT: sdot v3.4s, v0.16b, v2.16b
+; CHECK-SD-DOT-NEXT: addv s0, v3.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v5.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v6.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v7.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v2.4s, v4.4s, v2.8h
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v5.4s, v0.8h
+; CHECK-GI-BASE-NEXT: saddw2 v3.4s, v6.4s, v3.8h
+; CHECK-GI-BASE-NEXT: saddw2 v1.4s, v7.4s, v1.8h
+; CHECK-GI-BASE-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -3079,40 +3222,40 @@ entry:
}
define i32 @add_pair_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: uadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v1.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v3.8b, #1
-; CHECK-DOT-NEXT: udot v2.2s, v1.8b, v3.8b
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
-; CHECK-GI-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: addv s0, v1.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v3.8b, #1
+; CHECK-SD-DOT-NEXT: udot v2.2s, v1.8b, v3.8b
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -3123,40 +3266,40 @@ entry:
}
define i32 @add_pair_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: sadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v1.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v3.8b, #1
-; CHECK-DOT-NEXT: sdot v2.2s, v1.8b, v3.8b
-; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: saddw2 v0.4s, v2.4s, v0.8h
-; CHECK-GI-NEXT: saddw2 v1.4s, v3.4s, v1.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: saddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: sadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: addv s0, v1.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v3.8b, #1
+; CHECK-SD-DOT-NEXT: sdot v2.2s, v1.8b, v3.8b
+; CHECK-SD-DOT-NEXT: sdot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-BASE-NEXT: saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -3167,37 +3310,37 @@ entry:
}
define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i8_v4i32_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i8_v4i32_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-DOT-NEXT: uaddl v0.4s, v0.4h, v1.4h
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i8_v4i32_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -3208,44 +3351,44 @@ entry:
}
define i32 @add_pair_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i8_v4i32_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: shl v0.4s, v0.4s, #24
-; CHECK-BASE-NEXT: shl v1.4s, v1.4s, #24
-; CHECK-BASE-NEXT: sshr v0.4s, v0.4s, #24
-; CHECK-BASE-NEXT: ssra v0.4s, v1.4s, #24
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i8_v4i32_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: shl v0.4s, v0.4s, #24
-; CHECK-DOT-NEXT: shl v1.4s, v1.4s, #24
-; CHECK-DOT-NEXT: sshr v0.4s, v0.4s, #24
-; CHECK-DOT-NEXT: ssra v0.4s, v1.4s, #24
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i8_v4i32_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #24
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #24
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #24
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #24
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-SD-BASE-NEXT: shl v1.4s, v1.4s, #24
+; CHECK-SD-BASE-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-SD-BASE-NEXT: ssra v0.4s, v1.4s, #24
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-SD-DOT-NEXT: shl v1.4s, v1.4s, #24
+; CHECK-SD-DOT-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-SD-DOT-NEXT: ssra v0.4s, v1.4s, #24
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-GI-BASE-NEXT: shl v1.4s, v1.4s, #24
+; CHECK-GI-BASE-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-GI-BASE-NEXT: sshr v1.4s, v1.4s, #24
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -3256,35 +3399,35 @@ entry:
}
define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i16_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlp v1.8h, v1.16b
-; CHECK-BASE-NEXT: uadalp v1.8h, v0.16b
-; CHECK-BASE-NEXT: addv h0, v1.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i16_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlp v1.8h, v1.16b
-; CHECK-DOT-NEXT: uadalp v1.8h, v0.16b
-; CHECK-DOT-NEXT: addv h0, v1.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i16_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: uaddw2 v0.8h, v2.8h, v0.16b
-; CHECK-GI-NEXT: uaddw2 v1.8h, v3.8h, v1.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: addv h1, v1.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlp v1.8h, v1.16b
+; CHECK-SD-BASE-NEXT: uadalp v1.8h, v0.16b
+; CHECK-SD-BASE-NEXT: addv h0, v1.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlp v1.8h, v1.16b
+; CHECK-SD-DOT-NEXT: uadalp v1.8h, v0.16b
+; CHECK-SD-DOT-NEXT: addv h0, v1.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-BASE-NEXT: uaddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: addv h1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3295,35 +3438,35 @@ entry:
}
define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i16_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddlp v1.8h, v1.16b
-; CHECK-BASE-NEXT: sadalp v1.8h, v0.16b
-; CHECK-BASE-NEXT: addv h0, v1.8h
-; CHECK-BASE-NEXT: smov w0, v0.h[0]
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i16_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddlp v1.8h, v1.16b
-; CHECK-DOT-NEXT: sadalp v1.8h, v0.16b
-; CHECK-DOT-NEXT: addv h0, v1.8h
-; CHECK-DOT-NEXT: smov w0, v0.h[0]
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i16_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: saddw2 v0.8h, v2.8h, v0.16b
-; CHECK-GI-NEXT: saddw2 v1.8h, v3.8h, v1.16b
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: addv h1, v1.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxth
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddlp v1.8h, v1.16b
+; CHECK-SD-BASE-NEXT: sadalp v1.8h, v0.16b
+; CHECK-SD-BASE-NEXT: addv h0, v1.8h
+; CHECK-SD-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddlp v1.8h, v1.16b
+; CHECK-SD-DOT-NEXT: sadalp v1.8h, v0.16b
+; CHECK-SD-DOT-NEXT: addv h0, v1.8h
+; CHECK-SD-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: saddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-BASE-NEXT: saddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: addv h1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3334,31 +3477,31 @@ entry:
}
define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i16_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddl v0.8h, v0.8b, v1.8b
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i16_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddl v0.8h, v0.8b, v1.8b
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i16_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: addv h1, v1.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxth
-; CHECK-GI-NEXT: and w0, w8, #0xffff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: addv h1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -3369,31 +3512,31 @@ entry:
}
define signext i16 @add_pair_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i16_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: saddl v0.8h, v0.8b, v1.8b
-; CHECK-BASE-NEXT: addv h0, v0.8h
-; CHECK-BASE-NEXT: smov w0, v0.h[0]
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i16_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: saddl v0.8h, v0.8b, v1.8b
-; CHECK-DOT-NEXT: addv h0, v0.8h
-; CHECK-DOT-NEXT: smov w0, v0.h[0]
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i16_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: addv h0, v0.8h
-; CHECK-GI-NEXT: addv h1, v1.8h
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxth
-; CHECK-GI-NEXT: sxth w0, w8
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: saddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-BASE-NEXT: addv h0, v0.8h
+; CHECK-SD-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: saddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: addv h0, v0.8h
+; CHECK-SD-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: addv h0, v0.8h
+; CHECK-GI-BASE-NEXT: addv h1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-BASE-NEXT: sxth w0, w8
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -3404,29 +3547,29 @@ entry:
}
define zeroext i8 @add_pair_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i8:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: add v0.16b, v0.16b, v1.16b
-; CHECK-BASE-NEXT: addv b0, v0.16b
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i8:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: add v0.16b, v0.16b, v1.16b
-; CHECK-DOT-NEXT: addv b0, v0.16b
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addv b0, v0.16b
-; CHECK-GI-NEXT: addv b1, v1.16b
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: add w8, w9, w8, uxtb
-; CHECK-GI-NEXT: and w0, w8, #0xff
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i8:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-SD-BASE-NEXT: addv b0, v0.16b
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i8:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-SD-DOT-NEXT: addv b0, v0.16b
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i8:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addv b0, v0.16b
+; CHECK-GI-BASE-NEXT: addv b1, v1.16b
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxtb
+; CHECK-GI-BASE-NEXT: and w0, w8, #0xff
+; CHECK-GI-BASE-NEXT: ret
entry:
%z1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
%z2 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %y)
@@ -3435,114 +3578,114 @@ entry:
}
define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll2 v2.8h, v0.16b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll2 v3.8h, v1.16b, #0
-; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-BASE-NEXT: ushll2 v5.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll2 v6.4s, v3.8h, #0
-; CHECK-BASE-NEXT: ushll2 v7.4s, v1.8h, #0
-; CHECK-BASE-NEXT: ushll v3.4s, v3.4h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v16.2d, v5.4s, v2.4s
-; CHECK-BASE-NEXT: uaddl v2.2d, v5.2s, v2.2s
-; CHECK-BASE-NEXT: uaddl2 v5.2d, v0.4s, v4.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v4.2s
-; CHECK-BASE-NEXT: uaddl2 v4.2d, v7.4s, v6.4s
-; CHECK-BASE-NEXT: uaddl v6.2d, v7.2s, v6.2s
-; CHECK-BASE-NEXT: uaddl2 v7.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v3.2d, v5.2d, v16.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v2.2d, v7.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v6.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll2 v2.8h, v0.16b, #0
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll2 v3.8h, v1.16b, #0
-; CHECK-DOT-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-DOT-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-DOT-NEXT: ushll2 v5.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll2 v6.4s, v3.8h, #0
-; CHECK-DOT-NEXT: ushll2 v7.4s, v1.8h, #0
-; CHECK-DOT-NEXT: ushll v3.4s, v3.4h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v16.2d, v5.4s, v2.4s
-; CHECK-DOT-NEXT: uaddl v2.2d, v5.2s, v2.2s
-; CHECK-DOT-NEXT: uaddl2 v5.2d, v0.4s, v4.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v4.2s
-; CHECK-DOT-NEXT: uaddl2 v4.2d, v7.4s, v6.4s
-; CHECK-DOT-NEXT: uaddl v6.2d, v7.2s, v6.2s
-; CHECK-DOT-NEXT: uaddl2 v7.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v3.2d, v5.2d, v16.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v2.2d, v7.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v6.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
-; CHECK-GI-NEXT: ushll v7.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v16.2d, v4.2s, #0
-; CHECK-GI-NEXT: ushll v17.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v18.2d, v5.2s, #0
-; CHECK-GI-NEXT: ushll v19.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v20.2d, v6.2s, #0
-; CHECK-GI-NEXT: ushll v21.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v22.2d, v7.2s, #0
-; CHECK-GI-NEXT: ushll v23.2d, v1.2s, #0
-; CHECK-GI-NEXT: uaddw2 v4.2d, v16.2d, v4.4s
-; CHECK-GI-NEXT: uaddw2 v2.2d, v17.2d, v2.4s
-; CHECK-GI-NEXT: uaddw2 v5.2d, v18.2d, v5.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v19.2d, v0.4s
-; CHECK-GI-NEXT: uaddw2 v6.2d, v20.2d, v6.4s
-; CHECK-GI-NEXT: uaddw2 v3.2d, v21.2d, v3.4s
-; CHECK-GI-NEXT: uaddw2 v7.2d, v22.2d, v7.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v23.2d, v1.4s
-; CHECK-GI-NEXT: add v2.2d, v4.2d, v2.2d
-; CHECK-GI-NEXT: add v0.2d, v5.2d, v0.2d
-; CHECK-GI-NEXT: add v3.2d, v6.2d, v3.2d
-; CHECK-GI-NEXT: add v1.2d, v7.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v5.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v6.4s, v3.8h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v7.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v16.2d, v5.4s, v2.4s
+; CHECK-SD-BASE-NEXT: uaddl v2.2d, v5.2s, v2.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v5.2d, v0.4s, v4.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v4.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v4.2d, v7.4s, v6.4s
+; CHECK-SD-BASE-NEXT: uaddl v6.2d, v7.2s, v6.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v7.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v3.2d, v5.2d, v16.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v2.2d, v7.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v6.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v3.8h, v1.16b, #0
+; CHECK-SD-DOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-DOT-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v5.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v6.4s, v3.8h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v7.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v16.2d, v5.4s, v2.4s
+; CHECK-SD-DOT-NEXT: uaddl v2.2d, v5.2s, v2.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v5.2d, v0.4s, v4.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v4.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v4.2d, v7.4s, v6.4s
+; CHECK-SD-DOT-NEXT: uaddl v6.2d, v7.2s, v6.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v7.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v3.2d, v5.2d, v16.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v2.2d, v7.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v6.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v5.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v6.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v7.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v16.2d, v4.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v17.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v18.2d, v5.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v19.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v20.2d, v6.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v21.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v22.2d, v7.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v23.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v4.2d, v16.2d, v4.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v2.2d, v17.2d, v2.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v5.2d, v18.2d, v5.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v19.2d, v0.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v6.2d, v20.2d, v6.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v3.2d, v21.2d, v3.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v7.2d, v22.2d, v7.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v23.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v2.2d, v4.2d, v2.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v5.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v3.2d, v6.2d, v3.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v7.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -3553,114 +3696,114 @@ entry:
}
define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v16i8_v16i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll2 v2.8h, v0.16b, #0
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll2 v3.8h, v1.16b, #0
-; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-BASE-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-BASE-NEXT: sshll2 v5.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: sshll2 v6.4s, v3.8h, #0
-; CHECK-BASE-NEXT: sshll2 v7.4s, v1.8h, #0
-; CHECK-BASE-NEXT: sshll v3.4s, v3.4h, #0
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: saddl2 v16.2d, v5.4s, v2.4s
-; CHECK-BASE-NEXT: saddl v2.2d, v5.2s, v2.2s
-; CHECK-BASE-NEXT: saddl2 v5.2d, v0.4s, v4.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v4.2s
-; CHECK-BASE-NEXT: saddl2 v4.2d, v7.4s, v6.4s
-; CHECK-BASE-NEXT: saddl v6.2d, v7.2s, v6.2s
-; CHECK-BASE-NEXT: saddl2 v7.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v3.2d, v5.2d, v16.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-BASE-NEXT: add v2.2d, v7.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v6.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v16i8_v16i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll2 v2.8h, v0.16b, #0
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll2 v3.8h, v1.16b, #0
-; CHECK-DOT-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-DOT-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-DOT-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-DOT-NEXT: sshll2 v5.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: sshll2 v6.4s, v3.8h, #0
-; CHECK-DOT-NEXT: sshll2 v7.4s, v1.8h, #0
-; CHECK-DOT-NEXT: sshll v3.4s, v3.4h, #0
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: saddl2 v16.2d, v5.4s, v2.4s
-; CHECK-DOT-NEXT: saddl v2.2d, v5.2s, v2.2s
-; CHECK-DOT-NEXT: saddl2 v5.2d, v0.4s, v4.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v4.2s
-; CHECK-DOT-NEXT: saddl2 v4.2d, v7.4s, v6.4s
-; CHECK-DOT-NEXT: saddl v6.2d, v7.2s, v6.2s
-; CHECK-DOT-NEXT: saddl2 v7.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v3.2d, v5.2d, v16.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-DOT-NEXT: add v2.2d, v7.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v6.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v16i8_v16i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
-; CHECK-GI-NEXT: sshll2 v1.8h, v1.16b, #0
-; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
-; CHECK-GI-NEXT: sshll v7.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: sshll v16.2d, v4.2s, #0
-; CHECK-GI-NEXT: sshll v17.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v18.2d, v5.2s, #0
-; CHECK-GI-NEXT: sshll v19.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v20.2d, v6.2s, #0
-; CHECK-GI-NEXT: sshll v21.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v22.2d, v7.2s, #0
-; CHECK-GI-NEXT: sshll v23.2d, v1.2s, #0
-; CHECK-GI-NEXT: saddw2 v4.2d, v16.2d, v4.4s
-; CHECK-GI-NEXT: saddw2 v2.2d, v17.2d, v2.4s
-; CHECK-GI-NEXT: saddw2 v5.2d, v18.2d, v5.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v19.2d, v0.4s
-; CHECK-GI-NEXT: saddw2 v6.2d, v20.2d, v6.4s
-; CHECK-GI-NEXT: saddw2 v3.2d, v21.2d, v3.4s
-; CHECK-GI-NEXT: saddw2 v7.2d, v22.2d, v7.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v23.2d, v1.4s
-; CHECK-GI-NEXT: add v2.2d, v4.2d, v2.2d
-; CHECK-GI-NEXT: add v0.2d, v5.2d, v0.2d
-; CHECK-GI-NEXT: add v3.2d, v6.2d, v3.2d
-; CHECK-GI-NEXT: add v1.2d, v7.2d, v1.2d
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll2 v2.8h, v0.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.8h, v1.16b, #0
+; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v5.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v6.4s, v3.8h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v7.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v3.4s, v3.4h, #0
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v16.2d, v5.4s, v2.4s
+; CHECK-SD-BASE-NEXT: saddl v2.2d, v5.2s, v2.2s
+; CHECK-SD-BASE-NEXT: saddl2 v5.2d, v0.4s, v4.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v4.2s
+; CHECK-SD-BASE-NEXT: saddl2 v4.2d, v7.4s, v6.4s
+; CHECK-SD-BASE-NEXT: saddl v6.2d, v7.2s, v6.2s
+; CHECK-SD-BASE-NEXT: saddl2 v7.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v3.2d, v5.2d, v16.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v2.2d, v7.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v6.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll2 v2.8h, v0.16b, #0
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v3.8h, v1.16b, #0
+; CHECK-SD-DOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-DOT-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v5.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v6.4s, v3.8h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v7.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v3.4s, v3.4h, #0
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v16.2d, v5.4s, v2.4s
+; CHECK-SD-DOT-NEXT: saddl v2.2d, v5.2s, v2.2s
+; CHECK-SD-DOT-NEXT: saddl2 v5.2d, v0.4s, v4.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v4.2s
+; CHECK-SD-DOT-NEXT: saddl2 v4.2d, v7.4s, v6.4s
+; CHECK-SD-DOT-NEXT: saddl v6.2d, v7.2s, v6.2s
+; CHECK-SD-DOT-NEXT: saddl2 v7.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v3.2d, v5.2d, v16.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v2.2d, v7.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v6.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-GI-BASE-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v5.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v6.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v3.4s, v3.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v7.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v16.2d, v4.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v17.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v18.2d, v5.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v19.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v20.2d, v6.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v21.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v22.2d, v7.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v23.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v4.2d, v16.2d, v4.4s
+; CHECK-GI-BASE-NEXT: saddw2 v2.2d, v17.2d, v2.4s
+; CHECK-GI-BASE-NEXT: saddw2 v5.2d, v18.2d, v5.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v19.2d, v0.4s
+; CHECK-GI-BASE-NEXT: saddw2 v6.2d, v20.2d, v6.4s
+; CHECK-GI-BASE-NEXT: saddw2 v3.2d, v21.2d, v3.4s
+; CHECK-GI-BASE-NEXT: saddw2 v7.2d, v22.2d, v7.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v23.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v2.2d, v4.2d, v2.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v5.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v3.2d, v6.2d, v3.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v7.2d, v1.2d
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -3671,68 +3814,68 @@ entry:
}
define i64 @add_pair_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll2 v3.4s, v1.8h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v2.2s
-; CHECK-BASE-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-DOT-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll2 v3.4s, v1.8h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v2.2s
-; CHECK-DOT-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v1.2s, #0
-; CHECK-GI-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
-; CHECK-GI-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-BASE-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-DOT-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-DOT-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: uaddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v5.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v7.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -3743,68 +3886,68 @@ entry:
}
define i64 @add_pair_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-BASE-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: sshll2 v3.4s, v1.8h, #0
-; CHECK-BASE-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: saddl2 v4.2d, v0.4s, v2.4s
-; CHECK-BASE-NEXT: saddl v0.2d, v0.2s, v2.2s
-; CHECK-BASE-NEXT: saddl2 v2.2d, v1.4s, v3.4s
-; CHECK-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-BASE-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-DOT-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-DOT-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-DOT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: sshll2 v3.4s, v1.8h, #0
-; CHECK-DOT-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: saddl2 v4.2d, v0.4s, v2.4s
-; CHECK-DOT-NEXT: saddl v0.2d, v0.2s, v2.2s
-; CHECK-DOT-NEXT: saddl2 v2.2d, v1.4s, v3.4s
-; CHECK-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-DOT-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v1.2s, #0
-; CHECK-GI-NEXT: saddw2 v2.2d, v4.2d, v2.4s
-; CHECK-GI-NEXT: saddw2 v0.2d, v5.2d, v0.4s
-; CHECK-GI-NEXT: saddw2 v3.2d, v6.2d, v3.4s
-; CHECK-GI-NEXT: saddw2 v1.2d, v7.2d, v1.4s
-; CHECK-GI-NEXT: add v0.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-BASE-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-BASE-NEXT: saddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-BASE-NEXT: saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-BASE-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-BASE-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-DOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-DOT-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-SD-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-SD-DOT-NEXT: saddl v0.2d, v0.2s, v2.2s
+; CHECK-SD-DOT-NEXT: saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-SD-DOT-NEXT: saddl v1.2d, v1.2s, v3.2s
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-DOT-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v5.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-BASE-NEXT: sshll v7.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-BASE-NEXT: saddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-BASE-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-BASE-NEXT: saddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-BASE-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -3815,51 +3958,51 @@ entry:
}
define i64 @add_pair_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i8_v4i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-BASE-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-BASE-NEXT: uadalp v1.2d, v0.4s
-; CHECK-BASE-NEXT: addp d0, v1.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i8_v4i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-DOT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: uaddlp v1.2d, v1.4s
-; CHECK-DOT-NEXT: uadalp v1.2d, v0.4s
-; CHECK-DOT-NEXT: addp d0, v1.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i8_v4i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: movi v2.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT: ushll v4.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
-; CHECK-GI-NEXT: and v3.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: add v1.2d, v4.2d, v1.2d
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-BASE-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-BASE-NEXT: addp d0, v1.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: uaddlp v1.2d, v1.4s
+; CHECK-SD-DOT-NEXT: uadalp v1.2d, v0.4s
+; CHECK-SD-DOT-NEXT: addp d0, v1.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: ushll v4.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-BASE-NEXT: and v3.16b, v3.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v4.16b, v4.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-BASE-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-BASE-NEXT: add v1.2d, v4.2d, v1.2d
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -3870,70 +4013,70 @@ entry:
}
define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v4i8_v4i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-BASE-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-BASE-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-BASE-NEXT: ushll v3.2d, v1.2s, #0
-; CHECK-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-BASE-NEXT: ushll2 v1.2d, v1.4s, #0
-; CHECK-BASE-NEXT: shl v2.2d, v2.2d, #56
-; CHECK-BASE-NEXT: shl v3.2d, v3.2d, #56
-; CHECK-BASE-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-BASE-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: sshr v2.2d, v2.2d, #56
-; CHECK-BASE-NEXT: sshr v3.2d, v3.2d, #56
-; CHECK-BASE-NEXT: ssra v2.2d, v0.2d, #56
-; CHECK-BASE-NEXT: ssra v3.2d, v1.2d, #56
-; CHECK-BASE-NEXT: add v0.2d, v2.2d, v3.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v4i8_v4i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-DOT-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-DOT-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-DOT-NEXT: ushll v3.2d, v1.2s, #0
-; CHECK-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-DOT-NEXT: ushll2 v1.2d, v1.4s, #0
-; CHECK-DOT-NEXT: shl v2.2d, v2.2d, #56
-; CHECK-DOT-NEXT: shl v3.2d, v3.2d, #56
-; CHECK-DOT-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-DOT-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: sshr v2.2d, v2.2d, #56
-; CHECK-DOT-NEXT: sshr v3.2d, v3.2d, #56
-; CHECK-DOT-NEXT: ssra v2.2d, v0.2d, #56
-; CHECK-DOT-NEXT: ssra v3.2d, v1.2d, #56
-; CHECK-DOT-NEXT: add v0.2d, v2.2d, v3.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v4i8_v4i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v2.2d, v0.4s, #0
-; CHECK-GI-NEXT: ushll2 v3.2d, v1.4s, #0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: shl v2.2d, v2.2d, #56
-; CHECK-GI-NEXT: shl v3.2d, v3.2d, #56
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: sshr v2.2d, v2.2d, #56
-; CHECK-GI-NEXT: sshr v3.2d, v3.2d, #56
-; CHECK-GI-NEXT: ssra v2.2d, v0.2d, #56
-; CHECK-GI-NEXT: ssra v3.2d, v1.2d, #56
-; CHECK-GI-NEXT: addp d0, v2.2d
-; CHECK-GI-NEXT: addp d1, v3.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-BASE-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-SD-BASE-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-BASE-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-SD-BASE-NEXT: shl v2.2d, v2.2d, #56
+; CHECK-SD-BASE-NEXT: shl v3.2d, v3.2d, #56
+; CHECK-SD-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: sshr v2.2d, v2.2d, #56
+; CHECK-SD-BASE-NEXT: sshr v3.2d, v3.2d, #56
+; CHECK-SD-BASE-NEXT: ssra v2.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: ssra v3.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: add v0.2d, v2.2d, v3.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-SD-DOT-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-SD-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-DOT-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-SD-DOT-NEXT: shl v2.2d, v2.2d, #56
+; CHECK-SD-DOT-NEXT: shl v3.2d, v3.2d, #56
+; CHECK-SD-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: sshr v2.2d, v2.2d, #56
+; CHECK-SD-DOT-NEXT: sshr v3.2d, v3.2d, #56
+; CHECK-SD-DOT-NEXT: ssra v2.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: ssra v3.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: add v0.2d, v2.2d, v3.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-BASE-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: shl v2.2d, v2.2d, #56
+; CHECK-GI-BASE-NEXT: shl v3.2d, v3.2d, #56
+; CHECK-GI-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v2.2d, v2.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v3.2d, v3.2d, #56
+; CHECK-GI-BASE-NEXT: ssra v2.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: ssra v3.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: addp d0, v2.2d
+; CHECK-GI-BASE-NEXT: addp d1, v3.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -3944,39 +4087,39 @@ entry:
}
define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i8_v2i64_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: movi d2, #0x0000ff000000ff
-; CHECK-BASE-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-BASE-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i8_v2i64_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi d2, #0x0000ff000000ff
-; CHECK-DOT-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-DOT-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i8_v2i64_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2d, #0x000000000000ff
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: movi d2, #0x0000ff000000ff
+; CHECK-SD-BASE-NEXT: and v0.8b, v0.8b, v2.8b
+; CHECK-SD-BASE-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-BASE-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi d2, #0x0000ff000000ff
+; CHECK-SD-DOT-NEXT: and v0.8b, v0.8b, v2.8b
+; CHECK-SD-DOT-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-DOT-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-BASE-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -3987,44 +4130,44 @@ entry:
}
define i64 @add_pair_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i8_v2i64_sext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BASE-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-BASE-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-BASE-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-BASE-NEXT: sshr v0.2d, v0.2d, #56
-; CHECK-BASE-NEXT: ssra v0.2d, v1.2d, #56
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i8_v2i64_sext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-DOT-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-DOT-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-DOT-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-DOT-NEXT: sshr v0.2d, v0.2d, #56
-; CHECK-DOT-NEXT: ssra v0.2d, v1.2d, #56
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i8_v2i64_sext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-SD-BASE-NEXT: ssra v0.2d, v1.2d, #56
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-SD-DOT-NEXT: ssra v0.2d, v1.2d, #56
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-BASE-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-BASE-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-GI-BASE-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%xx = sext <2 x i8> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -4035,61 +4178,61 @@ entry:
}
define i32 @add_pair_v8i8_v8i32_double_sext_zext(<8 x i8> %ax, <8 x i8> %ay, <8 x i8> %bx, <8 x i8> %by) {
-; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-BASE-NEXT: sshll v3.8h, v3.8b, #0
-; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: saddlp v3.4s, v3.8h
-; CHECK-BASE-NEXT: uadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: sadalp v3.4s, v2.8h
-; CHECK-BASE-NEXT: add v0.4s, v3.4s, v1.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v5.8b, #1
-; CHECK-DOT-NEXT: movi v6.2d, #0000000000000000
-; CHECK-DOT-NEXT: udot v6.2s, v1.8b, v5.8b
-; CHECK-DOT-NEXT: sdot v4.2s, v3.8b, v5.8b
-; CHECK-DOT-NEXT: udot v6.2s, v0.8b, v5.8b
-; CHECK-DOT-NEXT: sdot v4.2s, v2.8b, v5.8b
-; CHECK-DOT-NEXT: add v0.2s, v6.2s, v4.2s
-; CHECK-DOT-NEXT: addp v0.2s, v0.2s, v0.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-GI-NEXT: sshll v3.8h, v3.8b, #0
-; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v6.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll v7.4s, v3.4h, #0
-; CHECK-GI-NEXT: uaddw2 v0.4s, v4.4s, v0.8h
-; CHECK-GI-NEXT: uaddw2 v1.4s, v5.4s, v1.8h
-; CHECK-GI-NEXT: saddw2 v2.4s, v6.4s, v2.8h
-; CHECK-GI-NEXT: saddw2 v3.4s, v7.4s, v3.8h
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: addv s2, v2.4s
-; CHECK-GI-NEXT: addv s3, v3.4s
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: add w8, w8, w9
-; CHECK-GI-NEXT: add w9, w10, w11
-; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v3.8h, v3.8b, #0
+; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-BASE-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: saddlp v3.4s, v3.8h
+; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: sadalp v3.4s, v2.8h
+; CHECK-SD-BASE-NEXT: add v0.4s, v3.4s, v1.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v5.8b, #1
+; CHECK-SD-DOT-NEXT: movi v6.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: udot v6.2s, v1.8b, v5.8b
+; CHECK-SD-DOT-NEXT: sdot v4.2s, v3.8b, v5.8b
+; CHECK-SD-DOT-NEXT: udot v6.2s, v0.8b, v5.8b
+; CHECK-SD-DOT-NEXT: sdot v4.2s, v2.8b, v5.8b
+; CHECK-SD-DOT-NEXT: add v0.2s, v6.2s, v4.2s
+; CHECK-SD-DOT-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-GI-BASE-NEXT: sshll v3.8h, v3.8b, #0
+; CHECK-GI-BASE-NEXT: ushll v4.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll v5.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v6.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: sshll v7.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: uaddw2 v0.4s, v4.4s, v0.8h
+; CHECK-GI-BASE-NEXT: uaddw2 v1.4s, v5.4s, v1.8h
+; CHECK-GI-BASE-NEXT: saddw2 v2.4s, v6.4s, v2.8h
+; CHECK-GI-BASE-NEXT: saddw2 v3.4s, v7.4s, v3.8h
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: addv s1, v1.4s
+; CHECK-GI-BASE-NEXT: addv s2, v2.4s
+; CHECK-GI-BASE-NEXT: addv s3, v3.4s
+; CHECK-GI-BASE-NEXT: fmov w8, s0
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: fmov w10, s2
+; CHECK-GI-BASE-NEXT: fmov w11, s3
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: add w9, w10, w11
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
entry:
%axx = zext <8 x i8> %ax to <8 x i32>
%az1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %axx)
@@ -4106,48 +4249,48 @@ entry:
}
define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) {
-; CHECK-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-BASE-NEXT: uaddlp v3.4s, v3.8h
-; CHECK-BASE-NEXT: uadalp v1.4s, v0.8h
-; CHECK-BASE-NEXT: uadalp v3.4s, v2.8h
-; CHECK-BASE-NEXT: add v0.4s, v3.4s, v1.4s
-; CHECK-BASE-NEXT: addv s0, v0.4s
-; CHECK-BASE-NEXT: fmov w0, s0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: uaddlp v1.4s, v1.8h
-; CHECK-DOT-NEXT: uaddlp v3.4s, v3.8h
-; CHECK-DOT-NEXT: uadalp v1.4s, v0.8h
-; CHECK-DOT-NEXT: uadalp v3.4s, v2.8h
-; CHECK-DOT-NEXT: add v0.4s, v3.4s, v1.4s
-; CHECK-DOT-NEXT: addv s0, v0.4s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0
-; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
-; CHECK-GI-NEXT: add v0.4s, v4.4s, v0.4s
-; CHECK-GI-NEXT: add v1.4s, v5.4s, v1.4s
-; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s
-; CHECK-GI-NEXT: add v3.4s, v7.4s, v3.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-BASE-NEXT: uaddlp v3.4s, v3.8h
+; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-BASE-NEXT: uadalp v3.4s, v2.8h
+; CHECK-SD-BASE-NEXT: add v0.4s, v3.4s, v1.4s
+; CHECK-SD-BASE-NEXT: addv s0, v0.4s
+; CHECK-SD-BASE-NEXT: fmov w0, s0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-SD-DOT-NEXT: uaddlp v3.4s, v3.8h
+; CHECK-SD-DOT-NEXT: uadalp v1.4s, v0.8h
+; CHECK-SD-DOT-NEXT: uadalp v3.4s, v2.8h
+; CHECK-SD-DOT-NEXT: add v0.4s, v3.4s, v1.4s
+; CHECK-SD-DOT-NEXT: addv s0, v0.4s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: ushll v4.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v5.4s, v1.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v6.4s, v2.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-GI-BASE-NEXT: ushll v7.4s, v3.4h, #0
+; CHECK-GI-BASE-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
+; CHECK-GI-BASE-NEXT: add v1.4s, v5.4s, v1.4s
+; CHECK-GI-BASE-NEXT: add v2.4s, v6.4s, v2.4s
+; CHECK-GI-BASE-NEXT: add v3.4s, v7.4s, v3.4s
+; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
+; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-BASE-NEXT: addv s0, v0.4s
+; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ret
entry:
%axx = zext <8 x i16> %ax to <8 x i32>
%s1h = shufflevector <8 x i32> %axx, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -4173,28 +4316,28 @@ entry:
}
define i64 @add_pair_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-BASE-LABEL: add_pair_v2i64_v2i64:
-; CHECK-BASE: // %bb.0: // %entry
-; CHECK-BASE-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-BASE-NEXT: addp d0, v0.2d
-; CHECK-BASE-NEXT: fmov x0, d0
-; CHECK-BASE-NEXT: ret
-;
-; CHECK-DOT-LABEL: add_pair_v2i64_v2i64:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-DOT-NEXT: addp d0, v0.2d
-; CHECK-DOT-NEXT: fmov x0, d0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: add_pair_v2i64_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: addp d0, v0.2d
-; CHECK-GI-NEXT: addp d1, v1.2d
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: add x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-SD-BASE-LABEL: add_pair_v2i64_v2i64:
+; CHECK-SD-BASE: // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-BASE-NEXT: addp d0, v0.2d
+; CHECK-SD-BASE-NEXT: fmov x0, d0
+; CHECK-SD-BASE-NEXT: ret
+;
+; CHECK-SD-DOT-LABEL: add_pair_v2i64_v2i64:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-DOT-NEXT: addp d0, v0.2d
+; CHECK-SD-DOT-NEXT: fmov x0, d0
+; CHECK-SD-DOT-NEXT: ret
+;
+; CHECK-GI-BASE-LABEL: add_pair_v2i64_v2i64:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: addp d0, v0.2d
+; CHECK-GI-BASE-NEXT: addp d1, v1.2d
+; CHECK-GI-BASE-NEXT: fmov x8, d0
+; CHECK-GI-BASE-NEXT: fmov x9, d1
+; CHECK-GI-BASE-NEXT: add x0, x8, x9
+; CHECK-GI-BASE-NEXT: ret
entry:
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
%z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %y)
@@ -4257,117 +4400,61 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
-; CHECK-DOT-LABEL: full:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: ldr d0, [x0]
-; CHECK-DOT-NEXT: ldr d1, [x2]
-; CHECK-DOT-NEXT: // kill: def $w3 killed $w3 def $x3
-; CHECK-DOT-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-DOT-NEXT: sxtw x8, w3
-; CHECK-DOT-NEXT: sxtw x9, w1
-; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-DOT-NEXT: movi v3.8b, #1
-; CHECK-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b
-; CHECK-DOT-NEXT: add x11, x2, x8
-; CHECK-DOT-NEXT: add x10, x0, x9
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: add x11, x11, x8
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: add x10, x10, x9
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: add x10, x10, x9
-; CHECK-DOT-NEXT: add x11, x11, x8
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: add x10, x10, x9
-; CHECK-DOT-NEXT: add x11, x11, x8
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: add x10, x10, x9
-; CHECK-DOT-NEXT: add x11, x11, x8
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: add x10, x10, x9
-; CHECK-DOT-NEXT: add x11, x11, x8
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10]
-; CHECK-DOT-NEXT: ldr d4, [x11]
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: ldr d1, [x10, x9]
-; CHECK-DOT-NEXT: ldr d4, [x11, x8]
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-DOT-NEXT: fmov w0, s0
-; CHECK-DOT-NEXT: ret
-;
-; CHECK-GI-LABEL: full:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr d0, [x0]
-; CHECK-GI-NEXT: ldr d1, [x2]
-; CHECK-GI-NEXT: // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: sxtw x8, w3
-; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: movi v3.8b, #1
-; CHECK-GI-NEXT: uabd v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: add x11, x2, x8
-; CHECK-GI-NEXT: add x10, x0, x9
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: ldr d1, [x10, x9]
-; CHECK-GI-NEXT: ldr d4, [x11, x8]
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-SD-DOT-LABEL: full:
+; CHECK-SD-DOT: // %bb.0: // %entry
+; CHECK-SD-DOT-NEXT: ldr d0, [x0]
+; CHECK-SD-DOT-NEXT: ldr d1, [x2]
+; CHECK-SD-DOT-NEXT: // kill: def $w3 killed $w3 def $x3
+; CHECK-SD-DOT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-DOT-NEXT: sxtw x8, w3
+; CHECK-SD-DOT-NEXT: sxtw x9, w1
+; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-DOT-NEXT: movi v3.8b, #1
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b
+; CHECK-SD-DOT-NEXT: add x11, x2, x8
+; CHECK-SD-DOT-NEXT: add x10, x0, x9
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: add x11, x11, x8
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: add x10, x10, x9
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: add x10, x10, x9
+; CHECK-SD-DOT-NEXT: add x11, x11, x8
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: add x10, x10, x9
+; CHECK-SD-DOT-NEXT: add x11, x11, x8
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: add x10, x10, x9
+; CHECK-SD-DOT-NEXT: add x11, x11, x8
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: add x10, x10, x9
+; CHECK-SD-DOT-NEXT: add x11, x11, x8
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11]
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: ldr d1, [x10, x9]
+; CHECK-SD-DOT-NEXT: ldr d4, [x11, x8]
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-SD-DOT-NEXT: fmov w0, s0
+; CHECK-SD-DOT-NEXT: ret
entry:
%idx.ext8 = sext i32 %s2 to i64
%idx.ext = sext i32 %s1 to i64
>From 5c03471342c86606a5cdc7707c2e57cb72c06730 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Mon, 30 Oct 2023 09:51:47 +0000
Subject: [PATCH 2/2] [AArch64][GlobalISel] Support udot lowering for vecreduce
add
vecreduce_add(mul(ext, ext)) -> vecreduce_add(udot)
vecreduce_add(ext) -> vecreduce_add(ext)
Vectors of scalar size of 8-bits with element count of multiples of 8
---
llvm/lib/Target/AArch64/AArch64Combine.td | 12 +-
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 15 +
.../GISel/AArch64PreLegalizerCombiner.cpp | 140 ++
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 1433 +++++++++++++++--
4 files changed, 1481 insertions(+), 119 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 017c4523c23a184..e17524b2c55bdd3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -33,12 +33,22 @@ def fold_global_offset : GICombineRule<
(apply [{ applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
>;
+let Predicates = [HasDotProd] in {
+def ext_addv_to_udot_addv : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_VECREDUCE_ADD):$root,
+ [{ return matchExtAddvToUdotAddv(*${root}, MRI); }]),
+ (apply [{ applyExtAddvToUdotAddv(*${root}, MRI, B, Observer); }])
+>;
+}
+
def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
fconstant_to_constant,
icmp_redundant_trunc,
fold_global_offset,
- shuffle_to_extract]> {
+ shuffle_to_extract,
+ ext_addv_to_udot_addv]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 27338bd24393325..1711360779bf74c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -227,6 +227,18 @@ def G_SMULL : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
+def G_SDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -270,6 +282,9 @@ def : GINodeEquiv<G_BSP, AArch64bsp>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
def : GINodeEquiv<G_SMULL, AArch64smull>;
+def : GINodeEquiv<G_UDOT, AArch64udot>;
+def : GINodeEquiv<G_SDOT, AArch64sdot>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index d9678bea214dd53..34a59839a99a97c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -228,6 +228,146 @@ void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
}
+// Combines vecreduce_add(mul(ext, ext)) -> vecreduce_add(udot)
+// Or vecreduce_add(ext) -> vecreduce_add(ext)
+// Similar to performVecReduceAddCombine in SelectionDAG
+bool matchExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
+ "Expected a G_VECREDUCE_ADD instruction");
+
+ MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register MidReg = I1->getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT MidTy = MRI.getType(MidReg);
+ if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
+ return false;
+
+ LLT SrcTy;
+ auto I1Opc = I1->getOpcode();
+ if (I1Opc == TargetOpcode::G_MUL) {
+ MachineInstr *ExtMI1 =
+ getDefIgnoringCopies(I1->getOperand(1).getReg(), MRI);
+ MachineInstr *ExtMI2 =
+ getDefIgnoringCopies(I1->getOperand(2).getReg(), MRI);
+ LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
+ LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
+
+ if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
+ return false;
+ I1Opc = ExtMI1->getOpcode();
+ SrcTy = MRI.getType(ExtMI1->getOperand(1).getReg());
+ } else
+ SrcTy = MRI.getType(I1->getOperand(1).getReg());
+
+ if (I1Opc != TargetOpcode::G_ZEXT && I1Opc != TargetOpcode::G_SEXT)
+ return false;
+ if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
+ return false;
+
+ return true;
+}
+
+void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ GISelChangeObserver &Observer) {
+ assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
+ "Expected a G_VECREDUCE_ADD instruction");
+ MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ Register Ext1SrcReg, Ext2SrcReg;
+ unsigned DotOpcode;
+ if (I1->getOpcode() == TargetOpcode::G_MUL) {
+ auto Ext1MI = getDefIgnoringCopies(I1->getOperand(1).getReg(), MRI);
+ auto Ext2MI = getDefIgnoringCopies(I1->getOperand(2).getReg(), MRI);
+ Ext1SrcReg = Ext1MI->getOperand(1).getReg();
+ Ext2SrcReg = Ext2MI->getOperand(1).getReg();
+ DotOpcode = Ext1MI->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UDOT
+ : AArch64::G_SDOT;
+ } else if (I1->getOpcode() == TargetOpcode::G_ZEXT ||
+ I1->getOpcode() == TargetOpcode::G_SEXT) {
+ Ext1SrcReg = I1->getOperand(1).getReg();
+ Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
+ ->getOperand(0)
+ .getReg();
+ DotOpcode = I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UDOT
+ : AArch64::G_SDOT;
+ } else
+ return;
+
+ LLT SrcTy = MRI.getType(Ext1SrcReg);
+ LLT MidTy;
+ unsigned NumOfVecReduce;
+ if (SrcTy.getNumElements() % 16 == 0) {
+ NumOfVecReduce = SrcTy.getNumElements() / 16;
+ MidTy = LLT::fixed_vector(4, 32);
+ } else if (SrcTy.getNumElements() % 8 == 0) {
+ NumOfVecReduce = SrcTy.getNumElements() / 8;
+ MidTy = LLT::fixed_vector(2, 32);
+ } else
+ return;
+
+ // Handle case where one DOT instruction is needed
+ if (NumOfVecReduce == 1) {
+ auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
+ auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
+ {Zeroes, Ext1SrcReg, Ext2SrcReg});
+ Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
+ } else {
+ // Get the number of output vectors needed
+ SmallVector<LLT, 4> DotVecLLT;
+ auto SrcVecNum = SrcTy.getNumElements();
+ while (SrcVecNum - 16 >= 16 || SrcVecNum - 16 == 0) {
+ DotVecLLT.push_back(LLT::fixed_vector(16, 8));
+ SrcVecNum = SrcVecNum - 16;
+ }
+ if (SrcVecNum == 8)
+ DotVecLLT.push_back(LLT::fixed_vector(8, 8));
+
+ // Unmerge the source vectors
+ auto Ext1Unmerge = Builder.buildUnmerge(DotVecLLT, Ext1SrcReg);
+ auto Ext2Unmerge = Builder.buildUnmerge(DotVecLLT, Ext2SrcReg);
+
+ // Build the UDOT instructions
+ SmallVector<Register, 2> DotReg;
+ unsigned NumElements = 0;
+ for (unsigned i = 0; i < DotVecLLT.size(); i++) {
+ LLT ZeroesLLT;
+ // Check if it is 16 or 8 elements. Set Zeroes to the accoridng size
+ if (MRI.getType(Ext1Unmerge.getReg(i)).getNumElements() == 16) {
+ ZeroesLLT = LLT::fixed_vector(4, 32);
+ NumElements += 4;
+ } else {
+ ZeroesLLT = LLT::fixed_vector(2, 32);
+ NumElements += 2;
+ }
+ auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
+ DotReg.push_back(Builder
+ .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
+ {Zeroes, Ext1Unmerge.getReg(i),
+ Ext2Unmerge.getReg(i)})
+ ->getOperand(0)
+ .getReg());
+ }
+
+ // Merge the output
+ // auto a = MI.getOperand(1).getReg().changeNumElements(NumElements);
+ auto ConcatMI =
+ Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
+
+ // Put it through a vector reduction
+ Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
+ ConcatMI->getOperand(0).getReg());
+ }
+
+ // Erase the dead instructions
+ if (I1->getOpcode() == TargetOpcode::G_MUL) {
+ getDefIgnoringCopies(I1->getOperand(1).getReg(), MRI)->eraseFromParent();
+ getDefIgnoringCopies(I1->getOperand(2).getReg(), MRI)->eraseFromParent();
+ }
+ I1->eraseFromParent();
+ MI.eraseFromParent();
+}
+
bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
CombinerHelper &Helper, GISelChangeObserver &Observer) {
// Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 2a512aaf9b2cc8d..c966173683e1161 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE,CHECK-SD-BASE
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-DOT
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-SD-DOT
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-BASE,CHECK-GI-BASE
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-GI-DOT
+; CHECK-GI-BASE: warning: Instruction selection used fallback path for full
+; CHECK-GI-DOT: warning: Instruction selection used fallback path for full
define i32 @addv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: addv_v2i32:
@@ -68,6 +71,14 @@ define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i32_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -94,6 +105,14 @@ define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i32_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -146,6 +165,14 @@ define i32 @add_v8i16_v8i32_zext(<8 x i16> %x) {
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -172,6 +199,14 @@ define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -223,6 +258,13 @@ define zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: uxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i16:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: uxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
ret i16 %z
@@ -263,6 +305,19 @@ define i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -304,6 +359,19 @@ define i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -333,6 +401,15 @@ define i64 @add_v4i16_v4i64_zext(<4 x i16> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i16_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -362,6 +439,15 @@ define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i16_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -395,6 +481,15 @@ define i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v2i16_v2i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x0000000000ffff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -428,14 +523,14 @@ define i32 @add_v16i8_v16i32_zext(<16 x i8> %x) {
; CHECK-SD-BASE-NEXT: fmov w0, s0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_zext:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v1.16b, #1
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: add_v16i8_v16i32_zext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.16b, #1
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_zext:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -467,14 +562,14 @@ define i32 @add_v16i8_v16i32_sext(<16 x i8> %x) {
; CHECK-SD-BASE-NEXT: fmov w0, s0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_sext:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v1.16b, #1
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: add_v16i8_v16i32_sext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.16b, #1
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_sext:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -519,6 +614,15 @@ define i32 @add_v8i8_v8i32_zext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.8b, #1
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v1.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -550,6 +654,15 @@ define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.8b, #1
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -581,6 +694,15 @@ define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -625,6 +747,15 @@ define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: uxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i16_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: uxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -654,6 +785,15 @@ define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i16_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -682,6 +822,14 @@ define zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: uxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i16_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: uxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -710,6 +858,14 @@ define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i16_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -735,6 +891,13 @@ define zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: uxtb w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i8:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv b0, v0.16b
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: uxtb w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
ret i8 %z
@@ -801,6 +964,29 @@ define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v5.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v7.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -868,6 +1054,29 @@ define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v5.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v7.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -912,6 +1121,20 @@ define i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -956,6 +1179,20 @@ define i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -991,6 +1228,19 @@ define i64 @add_v4i8_v4i64_zext(<4 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: and v2.16b, v2.16b, v1.16b
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1036,6 +1286,19 @@ define i64 @add_v4i8_v4i64_sext(<4 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v1.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: addp d0, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1069,6 +1332,15 @@ define i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
; CHECK-GI-BASE-NEXT: addp d0, v0.2d
; CHECK-GI-BASE-NEXT: fmov x0, d0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v2i8_v2i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x0, d0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -1137,6 +1409,15 @@ define i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1167,6 +1448,15 @@ define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1227,6 +1517,15 @@ define i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: add w0, w8, w0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w0, w8, w0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1257,6 +1556,15 @@ define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: add w0, w8, w0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w0, w8, w0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1318,6 +1626,14 @@ define zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i16_acc:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
%r = add i16 %z, %a
@@ -1362,6 +1678,20 @@ define i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -1407,6 +1737,20 @@ define i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -1440,6 +1784,16 @@ define i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1473,6 +1827,16 @@ define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -1510,6 +1874,16 @@ define i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x0000000000ffff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -1547,15 +1921,15 @@ define i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) {
; CHECK-SD-BASE-NEXT: add w0, w8, w0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_acc_zext:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v1.16b, #1
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w8, s0
-; CHECK-SD-DOT-NEXT: add w0, w8, w0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.16b, #1
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w8, s0
+; CHECK-DOT-NEXT: add w0, w8, w0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_acc_zext:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -1590,15 +1964,15 @@ define i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) {
; CHECK-SD-BASE-NEXT: add w0, w8, w0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: add_v16i8_v16i32_acc_sext:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v1.16b, #1
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w8, s0
-; CHECK-SD-DOT-NEXT: add w0, w8, w0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.16b, #1
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w8, s0
+; CHECK-DOT-NEXT: add w0, w8, w0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: add_v16i8_v16i32_acc_sext:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -1648,6 +2022,16 @@ define i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: add w0, w8, w0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.8b, #1
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v1.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w0, w8, w0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1683,6 +2067,16 @@ define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: add w0, w8, w0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.8b, #1
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w0, w8, w0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -1718,6 +2112,16 @@ define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
; CHECK-GI-BASE-NEXT: fmov w8, s0
; CHECK-GI-BASE-NEXT: add w0, w8, w0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w0, w8, w0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -1768,6 +2172,16 @@ define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1801,6 +2215,16 @@ define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1835,6 +2259,15 @@ define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -1869,6 +2302,15 @@ define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxth
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -1900,6 +2342,14 @@ define zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) {
; CHECK-GI-BASE-NEXT: add w8, w0, w8, uxtb
; CHECK-GI-BASE-NEXT: and w0, w8, #0xff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i8_acc:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv b0, v0.16b
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: add w8, w0, w8, uxtb
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xff
+; CHECK-GI-DOT-NEXT: ret
entry:
%z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
%r = add i8 %z, %a
@@ -1970,6 +2420,30 @@ define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v5.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v7.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -2041,6 +2515,30 @@ define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v5.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v7.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v5.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v7.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -2089,6 +2587,21 @@ define i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2137,6 +2650,21 @@ define i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v2.2d, v1.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v3.2d, v0.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2176,6 +2704,20 @@ define i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: and v2.16b, v2.16b, v1.16b
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2225,6 +2767,20 @@ define i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: ssra v1.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: addp d0, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2262,6 +2818,16 @@ define i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
; CHECK-GI-BASE-NEXT: fmov x8, d0
; CHECK-GI-BASE-NEXT: add x0, x8, x0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v1.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: add x0, x8, x0
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2322,6 +2888,15 @@ define i32 @add_pair_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i32_v4i32:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
%z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
@@ -2358,6 +2933,19 @@ define i64 @add_pair_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i32> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2396,6 +2984,19 @@ define i64 @add_pair_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i32> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -2430,6 +3031,17 @@ define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i32> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2464,6 +3076,17 @@ define i64 @add_pair_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <2 x i32> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -2502,6 +3125,19 @@ define i32 @add_pair_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-DOT-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -2540,6 +3176,19 @@ define i32 @add_pair_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-GI-DOT-NEXT: saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -2574,6 +3223,17 @@ define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -2608,6 +3268,17 @@ define i32 @add_pair_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -2628,13 +3299,13 @@ define i32 @test_udot_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: test_udot_v8i8:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: udot v2.2s, v1.8b, v0.8b
-; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: test_udot_v8i8:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v2.2s, v1.8b, v0.8b
+; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
entry:
%0 = zext <8 x i8> %a to <8 x i32>
%1 = zext <8 x i8> %b to <8 x i32>
@@ -2659,13 +3330,13 @@ define i32 @test_udot_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SD-BASE-NEXT: fmov w0, s0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: test_udot_v16i8:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: udot v2.4s, v1.16b, v0.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: test_udot_v16i8:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v2.4s, v1.16b, v0.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: test_udot_v16i8:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -2700,13 +3371,13 @@ define i32 @test_sdot_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: test_sdot_v8i8:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: sdot v2.2s, v1.8b, v0.8b
-; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: test_sdot_v8i8:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v2.2s, v1.8b, v0.8b
+; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
entry:
%0 = sext <8 x i8> %a to <8 x i32>
%1 = sext <8 x i8> %b to <8 x i32>
@@ -2731,13 +3402,13 @@ define i32 @test_sdot_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SD-BASE-NEXT: fmov w0, s0
; CHECK-SD-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: test_sdot_v16i8:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: sdot v2.4s, v1.16b, v0.16b
-; CHECK-SD-DOT-NEXT: addv s0, v2.4s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: test_sdot_v16i8:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v2.4s, v1.16b, v0.16b
+; CHECK-DOT-NEXT: addv s0, v2.4s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: test_sdot_v16i8:
; CHECK-GI-BASE: // %bb.0: // %entry
@@ -2785,6 +3456,16 @@ define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v8i16:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: addv h1, v1.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
%z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %y)
@@ -2849,6 +3530,29 @@ define i64 @add_pair_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v5.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v7.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i16> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2915,6 +3619,29 @@ define i64 @add_pair_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v5.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v7.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-DOT-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i16> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -2959,6 +3686,21 @@ define i64 @add_pair_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i16> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -3003,6 +3745,21 @@ define i64 @add_pair_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll v2.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v3.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i16> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -3046,6 +3803,20 @@ define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0x0000000000ffff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i16> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -3094,6 +3865,21 @@ define i64 @add_pair_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: shl v0.2d, v0.2d, #48
+; CHECK-GI-DOT-NEXT: shl v1.2d, v1.2d, #48
+; CHECK-GI-DOT-NEXT: sshr v0.2d, v0.2d, #48
+; CHECK-GI-DOT-NEXT: sshr v1.2d, v1.2d, #48
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <2 x i16> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -3153,6 +3939,20 @@ define i32 @add_pair_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.16b, #1
+; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: udot v4.4s, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: udot v3.4s, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: addv s0, v4.4s
+; CHECK-GI-DOT-NEXT: addv s1, v3.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -3212,6 +4012,20 @@ define i32 @add_pair_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.16b, #1
+; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: sdot v4.4s, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: sdot v3.4s, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: addv s0, v4.4s
+; CHECK-GI-DOT-NEXT: addv s1, v3.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
@@ -3256,6 +4070,20 @@ define i32 @add_pair_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.8b, #1
+; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: udot v4.2s, v0.8b, v2.8b
+; CHECK-GI-DOT-NEXT: udot v3.2s, v1.8b, v2.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v4.2s, v4.2s
+; CHECK-GI-DOT-NEXT: addp v1.2s, v3.2s, v3.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -3300,6 +4128,20 @@ define i32 @add_pair_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.8b, #1
+; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: sdot v4.2s, v0.8b, v2.8b
+; CHECK-GI-DOT-NEXT: sdot v3.2s, v1.8b, v2.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v4.2s, v4.2s
+; CHECK-GI-DOT-NEXT: addp v1.2s, v3.2s, v3.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
@@ -3341,6 +4183,20 @@ define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -3389,6 +4245,21 @@ define i32 @add_pair_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov w9, s1
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-GI-DOT-NEXT: shl v1.4s, v1.4s, #24
+; CHECK-GI-DOT-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-GI-DOT-NEXT: sshr v1.4s, v1.4s, #24
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: addv s1, v1.4s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i32>
%z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -3428,6 +4299,20 @@ define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-DOT-NEXT: uaddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: addv h1, v1.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3467,6 +4352,20 @@ define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: saddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-DOT-NEXT: saddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: addv h1, v1.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3502,6 +4401,18 @@ define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
; CHECK-GI-BASE-NEXT: and w0, w8, #0xffff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: addv h1, v1.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -3537,6 +4448,18 @@ define signext i16 @add_pair_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxth
; CHECK-GI-BASE-NEXT: sxth w0, w8
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: addv h0, v0.8h
+; CHECK-GI-DOT-NEXT: addv h1, v1.8h
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-DOT-NEXT: sxth w0, w8
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -3570,6 +4493,16 @@ define zeroext i8 @add_pair_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: add w8, w9, w8, uxtb
; CHECK-GI-BASE-NEXT: and w0, w8, #0xff
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i8:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addv b0, v0.16b
+; CHECK-GI-DOT-NEXT: addv b1, v1.16b
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: add w8, w9, w8, uxtb
+; CHECK-GI-DOT-NEXT: and w0, w8, #0xff
+; CHECK-GI-DOT-NEXT: ret
entry:
%z1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
%z2 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %y)
@@ -3686,6 +4619,49 @@ define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-GI-DOT-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v5.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v6.4s, v3.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v7.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v16.2d, v4.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v17.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v18.2d, v5.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v19.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v20.2d, v6.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v21.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v22.2d, v7.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v23.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v4.2d, v16.2d, v4.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v2.2d, v17.2d, v2.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v5.2d, v18.2d, v5.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v19.2d, v0.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v6.2d, v20.2d, v6.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v3.2d, v21.2d, v3.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v7.2d, v22.2d, v7.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v23.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v2.2d, v4.2d, v2.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v5.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v3.2d, v6.2d, v3.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v7.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -3804,6 +4780,49 @@ define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-DOT-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-GI-DOT-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v5.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v6.4s, v3.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v3.4s, v3.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v7.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v16.2d, v4.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v17.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v18.2d, v5.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v19.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v20.2d, v6.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v21.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v22.2d, v7.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v23.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v4.2d, v16.2d, v4.4s
+; CHECK-GI-DOT-NEXT: saddw2 v2.2d, v17.2d, v2.4s
+; CHECK-GI-DOT-NEXT: saddw2 v5.2d, v18.2d, v5.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v19.2d, v0.4s
+; CHECK-GI-DOT-NEXT: saddw2 v6.2d, v20.2d, v6.4s
+; CHECK-GI-DOT-NEXT: saddw2 v3.2d, v21.2d, v3.4s
+; CHECK-GI-DOT-NEXT: saddw2 v7.2d, v22.2d, v7.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v23.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v2.2d, v4.2d, v2.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v5.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v3.2d, v6.2d, v3.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v7.2d, v1.2d
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
@@ -3876,6 +4895,31 @@ define i64 @add_pair_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v5.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v7.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: uaddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: uaddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <8 x i8> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -3948,6 +4992,31 @@ define i64 @add_pair_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-DOT-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v5.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v6.2d, v3.2s, #0
+; CHECK-GI-DOT-NEXT: sshll v7.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: saddw2 v2.2d, v4.2d, v2.4s
+; CHECK-GI-DOT-NEXT: saddw2 v0.2d, v5.2d, v0.4s
+; CHECK-GI-DOT-NEXT: saddw2 v3.2d, v6.2d, v3.4s
+; CHECK-GI-DOT-NEXT: saddw2 v1.2d, v7.2d, v1.4s
+; CHECK-GI-DOT-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v3.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <8 x i8> %x to <8 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
@@ -4003,6 +5072,28 @@ define i64 @add_pair_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: ushll v4.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-DOT-NEXT: and v3.16b, v3.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v4.16b, v4.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-DOT-NEXT: add v1.2d, v4.2d, v1.2d
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <4 x i8> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -4077,6 +5168,29 @@ define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-DOT-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: shl v2.2d, v2.2d, #56
+; CHECK-GI-DOT-NEXT: shl v3.2d, v3.2d, #56
+; CHECK-GI-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v2.2d, v2.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v3.2d, v3.2d, #56
+; CHECK-GI-DOT-NEXT: ssra v2.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: ssra v3.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: addp d0, v2.2d
+; CHECK-GI-DOT-NEXT: addp d1, v3.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <4 x i8> %x to <4 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
@@ -4120,6 +5234,20 @@ define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-DOT-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = zext <2 x i8> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -4168,6 +5296,21 @@ define i64 @add_pair_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-DOT-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-DOT-NEXT: shl v0.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: shl v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-GI-DOT-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%xx = sext <2 x i8> %x to <2 x i64>
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
@@ -4233,6 +5376,30 @@ define i32 @add_pair_v8i8_v8i32_double_sext_zext(<8 x i8> %ax, <8 x i8> %ay, <8
; CHECK-GI-BASE-NEXT: add w9, w10, w11
; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i8_v8i32_double_sext_zext:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v4.8b, #1
+; CHECK-GI-DOT-NEXT: movi v5.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v6.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v7.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: movi v16.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: udot v5.2s, v0.8b, v4.8b
+; CHECK-GI-DOT-NEXT: sdot v6.2s, v3.8b, v4.8b
+; CHECK-GI-DOT-NEXT: udot v7.2s, v1.8b, v4.8b
+; CHECK-GI-DOT-NEXT: sdot v16.2s, v2.8b, v4.8b
+; CHECK-GI-DOT-NEXT: addp v0.2s, v5.2s, v5.2s
+; CHECK-GI-DOT-NEXT: addp v3.2s, v6.2s, v6.2s
+; CHECK-GI-DOT-NEXT: addp v1.2s, v7.2s, v7.2s
+; CHECK-GI-DOT-NEXT: addp v2.2s, v16.2s, v16.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s0
+; CHECK-GI-DOT-NEXT: fmov w11, s3
+; CHECK-GI-DOT-NEXT: fmov w9, s1
+; CHECK-GI-DOT-NEXT: fmov w10, s2
+; CHECK-GI-DOT-NEXT: add w8, w8, w9
+; CHECK-GI-DOT-NEXT: add w9, w10, w11
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%axx = zext <8 x i8> %ax to <8 x i32>
%az1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %axx)
@@ -4291,6 +5458,27 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i1
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: ushll v4.4s, v0.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v5.4s, v1.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v6.4s, v2.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-GI-DOT-NEXT: ushll v7.4s, v3.4h, #0
+; CHECK-GI-DOT-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-GI-DOT-NEXT: add v0.4s, v4.4s, v0.4s
+; CHECK-GI-DOT-NEXT: add v1.4s, v5.4s, v1.4s
+; CHECK-GI-DOT-NEXT: add v2.4s, v6.4s, v2.4s
+; CHECK-GI-DOT-NEXT: add v3.4s, v7.4s, v3.4s
+; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-DOT-NEXT: add v1.4s, v2.4s, v3.4s
+; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-DOT-NEXT: addv s0, v0.4s
+; CHECK-GI-DOT-NEXT: fmov w0, s0
+; CHECK-GI-DOT-NEXT: ret
entry:
%axx = zext <8 x i16> %ax to <8 x i32>
%s1h = shufflevector <8 x i32> %axx, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -4338,6 +5526,15 @@ define i64 @add_pair_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
; CHECK-GI-BASE-NEXT: fmov x9, d1
; CHECK-GI-BASE-NEXT: add x0, x8, x9
; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: add_pair_v2i64_v2i64:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: addp d0, v0.2d
+; CHECK-GI-DOT-NEXT: addp d1, v1.2d
+; CHECK-GI-DOT-NEXT: fmov x8, d0
+; CHECK-GI-DOT-NEXT: fmov x9, d1
+; CHECK-GI-DOT-NEXT: add x0, x8, x9
+; CHECK-GI-DOT-NEXT: ret
entry:
%z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
%z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %y)
@@ -4400,61 +5597,61 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
-; CHECK-SD-DOT-LABEL: full:
-; CHECK-SD-DOT: // %bb.0: // %entry
-; CHECK-SD-DOT-NEXT: ldr d0, [x0]
-; CHECK-SD-DOT-NEXT: ldr d1, [x2]
-; CHECK-SD-DOT-NEXT: // kill: def $w3 killed $w3 def $x3
-; CHECK-SD-DOT-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-DOT-NEXT: sxtw x8, w3
-; CHECK-SD-DOT-NEXT: sxtw x9, w1
-; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-DOT-NEXT: movi v3.8b, #1
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b
-; CHECK-SD-DOT-NEXT: add x11, x2, x8
-; CHECK-SD-DOT-NEXT: add x10, x0, x9
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: add x11, x11, x8
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: add x10, x10, x9
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: add x10, x10, x9
-; CHECK-SD-DOT-NEXT: add x11, x11, x8
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: add x10, x10, x9
-; CHECK-SD-DOT-NEXT: add x11, x11, x8
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: add x10, x10, x9
-; CHECK-SD-DOT-NEXT: add x11, x11, x8
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: add x10, x10, x9
-; CHECK-SD-DOT-NEXT: add x11, x11, x8
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11]
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: ldr d1, [x10, x9]
-; CHECK-SD-DOT-NEXT: ldr d4, [x11, x8]
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-SD-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-SD-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-SD-DOT-NEXT: fmov w0, s0
-; CHECK-SD-DOT-NEXT: ret
+; CHECK-DOT-LABEL: full:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: ldr d0, [x0]
+; CHECK-DOT-NEXT: ldr d1, [x2]
+; CHECK-DOT-NEXT: // kill: def $w3 killed $w3 def $x3
+; CHECK-DOT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-DOT-NEXT: sxtw x8, w3
+; CHECK-DOT-NEXT: sxtw x9, w1
+; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT: movi v3.8b, #1
+; CHECK-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b
+; CHECK-DOT-NEXT: add x11, x2, x8
+; CHECK-DOT-NEXT: add x10, x0, x9
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: add x11, x11, x8
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: add x10, x10, x9
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: add x10, x10, x9
+; CHECK-DOT-NEXT: add x11, x11, x8
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: add x10, x10, x9
+; CHECK-DOT-NEXT: add x11, x11, x8
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: add x10, x10, x9
+; CHECK-DOT-NEXT: add x11, x11, x8
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: add x10, x10, x9
+; CHECK-DOT-NEXT: add x11, x11, x8
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10]
+; CHECK-DOT-NEXT: ldr d4, [x11]
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: ldr d1, [x10, x9]
+; CHECK-DOT-NEXT: ldr d4, [x11, x8]
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
+; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
+; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
entry:
%idx.ext8 = sext i32 %s2 to i64
%idx.ext = sext i32 %s1 to i64
More information about the llvm-commits
mailing list