[llvm] [AArch64][GlobalISel] More type support for G_VECREDUCE_ADD (PR #67433)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 26 07:24:30 PDT 2023
https://github.com/chuongg3 created https://github.com/llvm/llvm-project/pull/67433
G_VECREDUCE_ADD is now able to have v4i16 and v8i8 vector types as source registers
>From 6c1f7c208e8c9db35f234a2dd855697b64acb089 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Tue, 26 Sep 2023 10:18:59 +0100
Subject: [PATCH] [AArch64][GlobalISel] More type support for G_VECREDUCE_ADD
G_VECREDUCE_ADD is now able to have v4i16 and v8i8 vector types as source registers
---
.../GISel/AArch64InstructionSelector.cpp | 4 ++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 9 +++-
llvm/test/CodeGen/AArch64/aarch64-addv.ll | 42 +++++++++++++------
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 36 ++++++++++++++++
4 files changed, 77 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 60bb820585ac0a2..0bbdebb80590a10 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3559,8 +3559,12 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
unsigned Opc = 0;
if (VecTy == LLT::fixed_vector(16, 8))
Opc = AArch64::ADDVv16i8v;
+ else if (VecTy == LLT::fixed_vector(8, 8))
+ Opc = AArch64::ADDVv8i8v;
else if (VecTy == LLT::fixed_vector(8, 16))
Opc = AArch64::ADDVv8i16v;
+ else if (VecTy == LLT::fixed_vector(4, 16))
+ Opc = AArch64::ADDVv4i16v;
else if (VecTy == LLT::fixed_vector(4, 32))
Opc = AArch64::ADDVv4i32v;
else if (VecTy == LLT::fixed_vector(2, 64))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 8d3d94290b0e580..323b81f2175f3fb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -861,8 +861,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.lower();
getActionDefinitionsBuilder(G_VECREDUCE_ADD)
- .legalFor(
- {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
+ .legalFor({{s8, v16s8},
+ {s8, v8s8},
+ {s16, v8s16},
+ {s16, v4s16},
+ {s32, v4s32},
+ {s32, v2s32},
+ {s64, v2s64}})
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.lower();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 6cab309d7c094c2..f1798ccb1e3bbaa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -144,12 +144,21 @@ define i32 @oversized_ADDV_512(ptr %arr) {
}
define i8 @addv_combine_i8(<8 x i8> %a1, <8 x i8> %a2) {
-; CHECK-LABEL: addv_combine_i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: addv b0, v0.8b
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; SDAG-LABEL: addv_combine_i8:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.8b, v0.8b, v1.8b
+; SDAG-NEXT: addv b0, v0.8b
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: addv_combine_i8:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: addv b0, v0.8b
+; GISEL-NEXT: addv b1, v1.8b
+; GISEL-NEXT: fmov w8, s0
+; GISEL-NEXT: fmov w9, s1
+; GISEL-NEXT: add w0, w9, w8, uxtb
+; GISEL-NEXT: ret
entry:
%rdx.1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a1)
%rdx.2 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a2)
@@ -158,12 +167,21 @@ entry:
}
define i16 @addv_combine_i16(<4 x i16> %a1, <4 x i16> %a2) {
-; CHECK-LABEL: addv_combine_i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: addv h0, v0.4h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; SDAG-LABEL: addv_combine_i16:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.4h, v0.4h, v1.4h
+; SDAG-NEXT: addv h0, v0.4h
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: addv_combine_i16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: addv h0, v0.4h
+; GISEL-NEXT: addv h1, v1.4h
+; GISEL-NEXT: fmov w8, s0
+; GISEL-NEXT: fmov w9, s1
+; GISEL-NEXT: add w0, w9, w8, uxth
+; GISEL-NEXT: ret
entry:
%rdx.1 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a1)
%rdx.2 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 039417784da0bba..4d2ec0ba7107ed9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -2,6 +2,28 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT
+define i32 @addv_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: addv_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
+ ret i32 %arg1
+}
+
+define i16 @addv_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: addv_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
+ ret i16 %arg1
+}
+
define i32 @add_v4i32_v4i32(<4 x i32> %x) {
; CHECK-LABEL: add_v4i32_v4i32:
; CHECK: // %bb.0: // %entry
@@ -13,6 +35,17 @@ entry:
ret i32 %z
}
+define i8 @addv_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: addv_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %arg1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
+ ret i8 %arg1
+}
+
define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
; CHECK-LABEL: add_v4i32_v4i64_zext:
; CHECK: // %bb.0: // %entry
@@ -2261,7 +2294,9 @@ entry:
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
@@ -2269,3 +2304,4 @@ declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
More information about the llvm-commits
mailing list