[llvm] [AArch64][GlobalISel] Legalize G_VECREDUCE_ADD bigger types (PR #68202)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 9 03:17:21 PDT 2023
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/68202
>From 57af26de098a1df7df4f15033de6d76da89bce49 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Tue, 3 Oct 2023 16:56:15 +0100
Subject: [PATCH 1/2] [AArch64][GlobalISel] Legalize G_VECREDUCE_ADD bigger
types
Legalize bigger types for i8 and i16 vector types for G_VECREDUCE_ADD
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 200 ++++++++++++++----
2 files changed, 155 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 323b81f2175f3fb..ae2922890e29226 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -870,6 +870,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{s64, v2s64}})
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8)
+ .clampMaxNumElements(1, s8, 16)
.lower();
getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 91ebe8b5ca568fd..a88c930d09e9b17 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -3,13 +3,7 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 -mattr=+dotprod %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for add_v16i8_v16i16_zext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v16i8_v16i16_sext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v16i8_v16i16_acc_zext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v16i8_v16i16_acc_sext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_pair_v16i8_v16i16_zext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_pair_v16i8_v16i16_sext
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for full
+; CHECK-GI: warning: Instruction selection used fallback path for full
define i32 @addv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: addv_v2i32:
@@ -610,12 +604,28 @@ entry:
}
define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
-; CHECK-LABEL: add_v16i8_v16i16_zext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_v16i8_v16i16_zext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: uaddlp v0.8h, v0.16b
+; CHECK-BASE-NEXT: addv h0, v0.8h
+; CHECK-BASE-NEXT: fmov w0, s0
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i16_zext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: uaddlp v0.8h, v0.16b
+; CHECK-DOT-NEXT: addv h0, v0.8h
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_v16i8_v16i16_zext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: uxth w0, w8
+; CHECK-GI-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -623,12 +633,28 @@ entry:
}
define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
-; CHECK-LABEL: add_v16i8_v16i16_sext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: smov w0, v0.h[0]
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_v16i8_v16i16_sext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: saddlp v0.8h, v0.16b
+; CHECK-BASE-NEXT: addv h0, v0.8h
+; CHECK-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i16_sext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: saddlp v0.8h, v0.16b
+; CHECK-DOT-NEXT: addv h0, v0.8h
+; CHECK-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_v16i8_v16i16_sext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: sxth w0, w8
+; CHECK-GI-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1718,13 +1744,31 @@ entry:
}
define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
-; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddlv h0, v0.16b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: add w8, w8, w0
-; CHECK-NEXT: and w0, w8, #0xffff
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: uaddlv h0, v0.16b
+; CHECK-BASE-NEXT: fmov w8, s0
+; CHECK-BASE-NEXT: add w8, w8, w0
+; CHECK-BASE-NEXT: and w0, w8, #0xffff
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: uaddlv h0, v0.16b
+; CHECK-DOT-NEXT: fmov w8, s0
+; CHECK-DOT-NEXT: add w8, w8, w0
+; CHECK-DOT-NEXT: and w0, w8, #0xffff
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-NEXT: and w0, w8, #0xffff
+; CHECK-GI-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -1733,13 +1777,31 @@ entry:
}
define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
-; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlv h0, v0.16b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: add w8, w8, w0
-; CHECK-NEXT: sxth w0, w8
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: saddlv h0, v0.16b
+; CHECK-BASE-NEXT: fmov w8, s0
+; CHECK-BASE-NEXT: add w8, w8, w0
+; CHECK-BASE-NEXT: sxth w0, w8
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: saddlv h0, v0.16b
+; CHECK-DOT-NEXT: fmov w8, s0
+; CHECK-DOT-NEXT: add w8, w8, w0
+; CHECK-DOT-NEXT: sxth w0, w8
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: add w8, w0, w8, uxth
+; CHECK-GI-NEXT: sxth w0, w8
+; CHECK-GI-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3194,13 +3256,35 @@ entry:
}
define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-LABEL: add_pair_v16i8_v16i16_zext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddlp v1.8h, v1.16b
-; CHECK-NEXT: uadalp v1.8h, v0.16b
-; CHECK-NEXT: addv h0, v1.8h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: uaddlp v1.8h, v1.16b
+; CHECK-BASE-NEXT: uadalp v1.8h, v0.16b
+; CHECK-BASE-NEXT: addv h0, v1.8h
+; CHECK-BASE-NEXT: fmov w0, s0
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: uaddlp v1.8h, v1.16b
+; CHECK-DOT-NEXT: uadalp v1.8h, v0.16b
+; CHECK-DOT-NEXT: addv h0, v1.8h
+; CHECK-DOT-NEXT: fmov w0, s0
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT: uaddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-NEXT: uaddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: addv h1, v1.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-NEXT: and w0, w8, #0xffff
+; CHECK-GI-NEXT: ret
entry:
%xx = zext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
@@ -3211,13 +3295,35 @@ entry:
}
define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
-; CHECK-LABEL: add_pair_v16i8_v16i16_sext:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v1.8h, v1.16b
-; CHECK-NEXT: sadalp v1.8h, v0.16b
-; CHECK-NEXT: addv h0, v1.8h
-; CHECK-NEXT: smov w0, v0.h[0]
-; CHECK-NEXT: ret
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-BASE: // %bb.0: // %entry
+; CHECK-BASE-NEXT: saddlp v1.8h, v1.16b
+; CHECK-BASE-NEXT: sadalp v1.8h, v0.16b
+; CHECK-BASE-NEXT: addv h0, v1.8h
+; CHECK-BASE-NEXT: smov w0, v0.h[0]
+; CHECK-BASE-NEXT: ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: saddlp v1.8h, v1.16b
+; CHECK-DOT-NEXT: sadalp v1.8h, v0.16b
+; CHECK-DOT-NEXT: addv h0, v1.8h
+; CHECK-DOT-NEXT: smov w0, v0.h[0]
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-GI-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT: saddw2 v0.8h, v2.8h, v0.16b
+; CHECK-GI-NEXT: saddw2 v1.8h, v3.8h, v1.16b
+; CHECK-GI-NEXT: addv h0, v0.8h
+; CHECK-GI-NEXT: addv h1, v1.8h
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: add w8, w9, w8, uxth
+; CHECK-GI-NEXT: sxth w0, w8
+; CHECK-GI-NEXT: ret
entry:
%xx = sext <16 x i8> %x to <16 x i16>
%z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
>From d7b5fc7c5c2ea0d2fc06d3ec6f4e78bf60a94bfe Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 6 Oct 2023 08:46:43 +0100
Subject: [PATCH 2/2] fixup! [AArch64][GlobalISel] Legalize G_VECREDUCE_ADD
bigger types
---
llvm/test/CodeGen/AArch64/aarch64-addv.ll | 265 +++++++++++++++++++++-
1 file changed, 253 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index f1798ccb1e3bbaa..7b09a4cc7b8a27b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -3,20 +3,35 @@
; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=generic 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL
; Function Attrs: nounwind readnone
-declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
-declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.add.v3i16(<3 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v3i32(<3 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v3i64(<3 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128>)
+
+; GISEL: warning: Instruction selection used fallback path for addv_v2i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v4i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i16
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i16
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i32
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i64
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i128
-; GISEL-NOT: Instruction selection used fallback path for add_B
-; GISEL-NOT: Instruction selection used fallback path for add_H
-; GISEL-NOT: Instruction selection used fallback path for add_S
-; GISEL-NOT: Instruction selection used fallback path for add_D
-; GISEL-NOT: Instruction selection used fallback path for oversized_ADDV_512
-; GISEL-NOT: Instruction selection used fallback path for addv_combine_i32
-; GISEL-NOT: Instruction selection used fallback path for addv_combine_i64
define i8 @add_B(ptr %arr) {
; CHECK-LABEL: add_B:
@@ -66,7 +81,6 @@ define i64 @add_D(ptr %arr) {
ret i64 %r
}
-declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias nocapture readonly %arg2) {
; SDAG-LABEL: oversized_ADDV_256:
@@ -234,3 +248,230 @@ entry:
%r = add i64 %rdx.1, %rdx.2
ret i64 %r
}
+
+define i8 @addv_v2i8(<2 x i8> %a) {
+; CHECK-LABEL: addv_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a)
+ ret i8 %arg1
+}
+
+define i8 @addv_v3i8(<3 x i8> %a) {
+; CHECK-LABEL: addv_v3i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v0.h[0], w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
+ ret i8 %arg1
+}
+
+define i8 @addv_v4i8(<4 x i8> %a) {
+; CHECK-LABEL: addv_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a)
+ ret i8 %arg1
+}
+
+define i8 @addv_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: addv_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
+ ret i8 %arg1
+}
+
+define i8 @addv_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: addv_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv b0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
+ ret i8 %arg1
+}
+
+define i8 @addv_v32i8(<32 x i8> %a) {
+; CHECK-LABEL: addv_v32i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: addv b0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
+ ret i8 %arg1
+}
+
+define i16 @addv_v2i16(<2 x i16> %a) {
+; CHECK-LABEL: addv_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a)
+ ret i16 %arg1
+}
+
+define i16 @addv_v3i16(<3 x i16> %a) {
+; CHECK-LABEL: addv_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[3], wzr
+; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %a)
+ ret i16 %arg1
+}
+
+define i16 @addv_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: addv_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
+ ret i16 %arg1
+}
+
+define i16 @addv_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: addv_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
+ ret i16 %arg1
+}
+
+define i16 @addv_v16i16(<16 x i16> %a) {
+; CHECK-LABEL: addv_v16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
+ ret i16 %arg1
+}
+
+define i32 @addv_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: addv_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
+ ret i32 %arg1
+}
+
+define i32 @addv_v3i32(<3 x i32> %a) {
+; CHECK-LABEL: addv_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v0.s[3], wzr
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a)
+ ret i32 %arg1
+}
+
+define i32 @addv_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: addv_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
+ ret i32 %arg1
+}
+
+define i32 @addv_v8i32(<8 x i32> %a) {
+; CHECK-LABEL: addv_v8i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
+ ret i32 %arg1
+}
+
+define i64 @addv_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: addv_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
+ ret i64 %arg1
+}
+
+define i64 @addv_v3i64(<3 x i64> %a) {
+; CHECK-LABEL: addv_v3i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: mov v2.d[1], xzr
+; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %a)
+ ret i64 %arg1
+}
+
+define i64 @addv_v4i64(<4 x i64> %a) {
+; CHECK-LABEL: addv_v4i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
+ ret i64 %arg1
+}
+
+define i128 @addv_v2i128(<2 x i128> %a) {
+; CHECK-LABEL: addv_v2i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adds x0, x0, x2
+; CHECK-NEXT: adc x1, x1, x3
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
+ ret i128 %arg1
+}
+
More information about the llvm-commits
mailing list