[llvm] [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (PR #79117)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 28 12:11:59 PST 2024
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/79117
>From d6764fcdfd5eade0be5a54afaa230389925a2f2e Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Mon, 22 Jan 2024 17:52:05 +0000
Subject: [PATCH 1/4] [AArch64][GlobalISel] Pre-Commit Test Update for Legalize
G_ABS Large Vector
---
llvm/test/CodeGen/AArch64/abs.ll | 363 +++++++++++++++++++++
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 8 +-
2 files changed, 370 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/abs.ll
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
new file mode 100644
index 000000000000000..9cbb1af98c1730c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for abs_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v32i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v2i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v16i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v8i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v4i64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v7i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v7i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i32
+
+; ===== Legal Scalars =====
+
+define i8 @abs_i8(i8 %a){
+; CHECK-SD-LABEL: abs_i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sxtb w8, w0
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cneg w0, w8, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sxtb w8, w0
+; CHECK-GI-NEXT: asr w8, w8, #7
+; CHECK-GI-NEXT: add w9, w0, w8
+; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call i8 @llvm.abs.i8(i8 %a, i1 0)
+ ret i8 %res
+}
+declare i8 @llvm.abs.i8(i8, i1)
+
+define i16 @abs_i16(i16 %a){
+; CHECK-SD-LABEL: abs_i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sxth w8, w0
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cneg w0, w8, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sxth w8, w0
+; CHECK-GI-NEXT: asr w8, w8, #15
+; CHECK-GI-NEXT: add w9, w0, w8
+; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call i16 @llvm.abs.i16(i16 %a, i1 0)
+ ret i16 %res
+}
+declare i16 @llvm.abs.i16(i16, i1)
+
+define i32 @abs_i32(i32 %a){
+; CHECK-SD-LABEL: abs_i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: cneg w0, w0, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: asr w8, w0, #31
+; CHECK-GI-NEXT: add w9, w0, w8
+; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call i32 @llvm.abs.i32(i32 %a, i1 0)
+ ret i32 %res
+}
+declare i32 @llvm.abs.i32(i32, i1)
+
+define i64 @abs_i64(i64 %a){
+; CHECK-SD-LABEL: abs_i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp x0, #0
+; CHECK-SD-NEXT: cneg x0, x0, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: asr x8, x0, #63
+; CHECK-GI-NEXT: add x9, x0, x8
+; CHECK-GI-NEXT: eor x0, x9, x8
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call i64 @llvm.abs.i64(i64 %a, i1 0)
+ ret i64 %res
+}
+declare i64 @llvm.abs.i64(i64, i1)
+
+define i128 @abs_i128(i128 %a){
+; CHECK-SD-LABEL: abs_i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: asr x8, x1, #63
+; CHECK-SD-NEXT: eor x9, x0, x8
+; CHECK-SD-NEXT: eor x10, x1, x8
+; CHECK-SD-NEXT: subs x0, x9, x8
+; CHECK-SD-NEXT: sbc x1, x10, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: asr x8, x1, #63
+; CHECK-GI-NEXT: adds x9, x0, x8
+; CHECK-GI-NEXT: adc x10, x1, x8
+; CHECK-GI-NEXT: eor x0, x9, x8
+; CHECK-GI-NEXT: eor x1, x10, x8
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call i128 @llvm.abs.i128(i128 %a, i1 0)
+ ret i128 %res
+}
+declare i128 @llvm.abs.i128(i128, i1)
+
+; ===== Legal Vector Types =====
+
+define <8 x i8> @abs_v8i8(<8 x i8> %a){
+; CHECK-LABEL: abs_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %a, i1 0)
+ ret <8 x i8> %res
+}
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
+
+define <16 x i8> @abs_v16i8(<16 x i8> %a){
+; CHECK-LABEL: abs_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.16b, v0.16b
+; CHECK-NEXT: ret
+entry:
+ %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 0)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
+
+define <4 x i16> @abs_v4i16(<4 x i16> %a){
+; CHECK-LABEL: abs_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %a, i1 0)
+ ret <4 x i16> %res
+}
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+
+define <8 x i16> @abs_v8i16(<8 x i16> %a){
+; CHECK-LABEL: abs_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 0)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
+
+define <2 x i32> @abs_v2i32(<2 x i32> %a){
+; CHECK-LABEL: abs_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %a, i1 0)
+ ret <2 x i32> %res
+}
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
+
+define <4 x i32> @abs_v4i32(<4 x i32> %a){
+; CHECK-LABEL: abs_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 0)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
+define <2 x i64> @abs_v2i64(<2 x i64> %a){
+; CHECK-LABEL: abs_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 0)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <4 x i8> @abs_v4i8(<4 x i8> %a){
+; CHECK-LABEL: abs_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %a, i1 0)
+ ret <4 x i8> %res
+}
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <32 x i8> @abs_v32i8(<32 x i8> %a){
+; CHECK-LABEL: abs_v32i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.16b, v0.16b
+; CHECK-NEXT: abs v1.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 0)
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
+
+define <2 x i16> @abs_v2i16(<2 x i16> %a){
+; CHECK-LABEL: abs_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-NEXT: sshr v0.2s, v0.2s, #16
+; CHECK-NEXT: abs v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %a, i1 0)
+ ret <2 x i16> %res
+}
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+
+define <16 x i16> @abs_v16i16(<16 x i16> %a){
+; CHECK-LABEL: abs_v16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8h, v0.8h
+; CHECK-NEXT: abs v1.8h, v1.8h
+; CHECK-NEXT: ret
+entry:
+ %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 0)
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+
+define <1 x i32> @abs_v1i32(<1 x i32> %a){
+; CHECK-SD-LABEL: abs_v1i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: abs v0.2s, v0.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_v1i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: asr w9, w8, #31
+; CHECK-GI-NEXT: add w8, w8, w9
+; CHECK-GI-NEXT: eor w8, w8, w9
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)
+ ret <1 x i32> %res
+}
+declare <1 x i32> @llvm.abs.v1i32(<1 x i32>, i1)
+
+define <8 x i32> @abs_v8i32(<8 x i32> %a){
+; CHECK-LABEL: abs_v8i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 0)
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
+
+define <4 x i64> @abs_v4i64(<4 x i64> %a){
+; CHECK-LABEL: abs_v4i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.2d, v0.2d
+; CHECK-NEXT: abs v1.2d, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 0)
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i8> @abs_v3i8(<3 x i8> %a){
+; CHECK-LABEL: abs_v3i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov v0.h[1], w1
+; CHECK-NEXT: mov v0.h[2], w2
+; CHECK-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: umov w1, v0.h[1]
+; CHECK-NEXT: umov w2, v0.h[2]
+; CHECK-NEXT: ret
+entry:
+ %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
+ ret <3 x i8> %res
+}
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+
+define <7 x i8> @abs_v7i8(<7 x i8> %a){
+; CHECK-LABEL: abs_v7i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %res = call <7 x i8> @llvm.abs.v7i8(<7 x i8> %a, i1 0)
+ ret <7 x i8> %res
+}
+declare <7 x i8> @llvm.abs.v7i8(<7 x i8>, i1)
+
+define <3 x i16> @abs_v3i16(<3 x i16> %a){
+; CHECK-LABEL: abs_v3i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %a, i1 0)
+ ret <3 x i16> %res
+}
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
+
+define <7 x i16> @abs_v7i16(<7 x i16> %a){
+; CHECK-LABEL: abs_v7i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
+ ret <7 x i16> %res
+}
+declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
+
+define <3 x i32> @abs_v3i32(<3 x i32> %a){
+; CHECK-LABEL: abs_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
+ ret <3 x i32> %res
+}
+declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index ad82d2e7955c27d..56fbb4f2a2b4d15 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4,7 +4,13 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
-; CHECK-GI-BASE: warning: Instruction selection used fallback path for full
+; CHECK-GI-BASE: warning: Instruction selection used fallback path for test_udot_v24i8
+; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_udot_v48i8
+; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v24i8
+; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v48i8
+; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for full
+
+; CHECK-GI-DOT: warning: Instruction selection used fallback path for full
define i32 @addv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: addv_v2i32:
>From fc45fe03d9677d54f58afb7f5d981b8c50e57a1d Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Mon, 22 Jan 2024 17:58:48 +0000
Subject: [PATCH 2/4] [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller
Vectors
Legalizing for non-legal width vectors with legal element size
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 1 +
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 7 +-
llvm/test/CodeGen/AArch64/abs.ll | 49 ++--
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 210 ++++++++----------
4 files changed, 134 insertions(+), 133 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6c06afa0979e5bb..464ff0864d146ff 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5254,6 +5254,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
+ case TargetOpcode::G_ABS:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 500db122ca50e75..c88396d94788e47 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -988,8 +988,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
if (HasCSSC)
ABSActions
.legalFor({s32, s64});
- ABSActions
- .legalFor(PackedVectorAllTypeList)
+ ABSActions.legalFor(PackedVectorAllTypeList)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
.lowerIf(isScalar(0));
// For fadd reductions we have pairwise operations available. We treat the
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 9cbb1af98c1730c..e34119f3f2b0ddb 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -3,14 +3,7 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for abs_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v32i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v7i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v7i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i32
@@ -300,18 +293,36 @@ declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
; ===== Vectors with Non-Pow 2 Widths =====
define <3 x i8> @abs_v3i8(<3 x i8> %a){
-; CHECK-LABEL: abs_v3i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: mov v0.h[1], w1
-; CHECK-NEXT: mov v0.h[2], w2
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: abs v0.4h, v0.4h
-; CHECK-NEXT: umov w0, v0.h[0]
-; CHECK-NEXT: umov w1, v0.h[1]
-; CHECK-NEXT: umov w2, v0.h[2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abs_v3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: abs v0.4h, v0.4h
+; CHECK-SD-NEXT: umov w0, v0.h[0]
+; CHECK-SD-NEXT: umov w1, v0.h[1]
+; CHECK-SD-NEXT: umov w2, v0.h[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_v3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v0.b[0]
+; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
+; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
+; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
+; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
+; CHECK-GI-NEXT: abs v0.8b, v0.8b
+; CHECK-GI-NEXT: umov w0, v0.b[0]
+; CHECK-GI-NEXT: umov w1, v0.b[1]
+; CHECK-GI-NEXT: umov w2, v0.b[2]
+; CHECK-GI-NEXT: ret
entry:
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
ret <3 x i8> %res
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 56fbb4f2a2b4d15..1531154b8a03c2e 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -8,9 +8,6 @@
; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_udot_v48i8
; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v24i8
; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v48i8
-; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for full
-
-; CHECK-GI-DOT: warning: Instruction selection used fallback path for full
define i32 @addv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: addv_v2i32:
@@ -5183,115 +5180,104 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
; CHECK-SD-DOT-NEXT: fmov w0, s0
; CHECK-SD-DOT-NEXT: ret
;
-; CHECK-GI-BASE-LABEL: full:
-; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: ldr d0, [x2]
-; CHECK-GI-BASE-NEXT: ldr d1, [x0]
-; CHECK-GI-BASE-NEXT: // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-BASE-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-BASE-NEXT: sxtw x8, w3
-; CHECK-GI-BASE-NEXT: sxtw x9, w1
-; CHECK-GI-BASE-NEXT: uabdl v0.8h, v1.8b, v0.8b
-; CHECK-GI-BASE-NEXT: add x11, x2, x8
-; CHECK-GI-BASE-NEXT: add x10, x0, x9
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: add x11, x11, x8
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: add x10, x10, x9
-; CHECK-GI-BASE-NEXT: uaddlp v0.4s, v0.8h
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: add x11, x11, x8
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: add x10, x10, x9
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: add x11, x11, x8
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: add x10, x10, x9
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: add x11, x11, x8
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: add x10, x10, x9
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: add x11, x11, x8
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: add x10, x10, x9
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11]
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10]
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: ldr d2, [x11, x8]
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: ldr d1, [x10, x9]
-; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
-; CHECK-GI-BASE-NEXT: ret
-;
-; CHECK-GI-DOT-LABEL: full:
-; CHECK-GI-DOT: // %bb.0: // %entry
-; CHECK-GI-DOT-NEXT: ldr d0, [x0]
-; CHECK-GI-DOT-NEXT: ldr d1, [x2]
-; CHECK-GI-DOT-NEXT: // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-DOT-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-DOT-NEXT: sxtw x8, w3
-; CHECK-GI-DOT-NEXT: sxtw x9, w1
-; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-DOT-NEXT: movi v3.8b, #1
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b
-; CHECK-GI-DOT-NEXT: add x11, x2, x8
-; CHECK-GI-DOT-NEXT: add x10, x0, x9
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: add x11, x11, x8
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: add x10, x10, x9
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: add x10, x10, x9
-; CHECK-GI-DOT-NEXT: add x11, x11, x8
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: add x10, x10, x9
-; CHECK-GI-DOT-NEXT: add x11, x11, x8
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: add x10, x10, x9
-; CHECK-GI-DOT-NEXT: add x11, x11, x8
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: add x10, x10, x9
-; CHECK-GI-DOT-NEXT: add x11, x11, x8
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11]
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: ldr d1, [x10, x9]
-; CHECK-GI-DOT-NEXT: ldr d4, [x11, x8]
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s
-; CHECK-GI-DOT-NEXT: fmov w0, s0
-; CHECK-GI-DOT-NEXT: ret
+; CHECK-GI-LABEL: full:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT: // kill: def $w3 killed $w3 def $x3
+; CHECK-GI-NEXT: sxtw x8, w1
+; CHECK-GI-NEXT: sxtw x9, w3
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x2]
+; CHECK-GI-NEXT: add x10, x0, x8
+; CHECK-GI-NEXT: add x11, x2, x9
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ldr d2, [x10]
+; CHECK-GI-NEXT: ldr d3, [x11]
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: add x10, x10, x8
+; CHECK-GI-NEXT: add x11, x11, x9
+; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0
+; CHECK-GI-NEXT: ldr d4, [x10]
+; CHECK-GI-NEXT: ldr d5, [x11]
+; CHECK-GI-NEXT: add x10, x10, x8
+; CHECK-GI-NEXT: add x11, x11, x9
+; CHECK-GI-NEXT: uabdl v6.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: uabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT: ldr d1, [x10]
+; CHECK-GI-NEXT: ushll v4.8h, v4.8b, #0
+; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT: ldr d7, [x11]
+; CHECK-GI-NEXT: uabdl v16.4s, v2.4h, v3.4h
+; CHECK-GI-NEXT: uabdl2 v2.4s, v2.8h, v3.8h
+; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT: ushll v7.8h, v7.8b, #0
+; CHECK-GI-NEXT: add x10, x10, x8
+; CHECK-GI-NEXT: add x11, x11, x9
+; CHECK-GI-NEXT: uabdl v1.4s, v4.4h, v5.4h
+; CHECK-GI-NEXT: uabdl2 v4.4s, v4.8h, v5.8h
+; CHECK-GI-NEXT: ldr d5, [x10]
+; CHECK-GI-NEXT: add v2.4s, v16.4s, v2.4s
+; CHECK-GI-NEXT: ldr d16, [x11]
+; CHECK-GI-NEXT: add v0.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT: uabdl v6.4s, v3.4h, v7.4h
+; CHECK-GI-NEXT: uabdl2 v3.4s, v3.8h, v7.8h
+; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT: add x10, x10, x8
+; CHECK-GI-NEXT: ushll v7.8h, v16.8b, #0
+; CHECK-GI-NEXT: add x11, x11, x9
+; CHECK-GI-NEXT: ldr d16, [x10]
+; CHECK-GI-NEXT: ldr d17, [x11]
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT: add x10, x10, x8
+; CHECK-GI-NEXT: add x11, x11, x9
+; CHECK-GI-NEXT: add v3.4s, v6.4s, v3.4s
+; CHECK-GI-NEXT: ushll v16.8h, v16.8b, #0
+; CHECK-GI-NEXT: ushll v17.8h, v17.8b, #0
+; CHECK-GI-NEXT: uabdl v22.4s, v5.4h, v7.4h
+; CHECK-GI-NEXT: uabdl2 v5.4s, v5.8h, v7.8h
+; CHECK-GI-NEXT: ldr d18, [x10]
+; CHECK-GI-NEXT: ldr d19, [x11]
+; CHECK-GI-NEXT: addv s0, v0.4s
+; CHECK-GI-NEXT: addv s2, v2.4s
+; CHECK-GI-NEXT: addv s1, v1.4s
+; CHECK-GI-NEXT: ushll v18.8h, v18.8b, #0
+; CHECK-GI-NEXT: ushll v19.8h, v19.8b, #0
+; CHECK-GI-NEXT: uabdl v4.4s, v16.4h, v17.4h
+; CHECK-GI-NEXT: uabdl2 v16.4s, v16.8h, v17.8h
+; CHECK-GI-NEXT: add v5.4s, v22.4s, v5.4s
+; CHECK-GI-NEXT: ldr d20, [x10, x8]
+; CHECK-GI-NEXT: ldr d21, [x11, x9]
+; CHECK-GI-NEXT: addv s3, v3.4s
+; CHECK-GI-NEXT: fmov w8, s2
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: ushll v7.8h, v20.8b, #0
+; CHECK-GI-NEXT: ushll v20.8h, v21.8b, #0
+; CHECK-GI-NEXT: uabdl v6.4s, v18.4h, v19.4h
+; CHECK-GI-NEXT: uabdl2 v17.4s, v18.8h, v19.8h
+; CHECK-GI-NEXT: add v4.4s, v4.4s, v16.4s
+; CHECK-GI-NEXT: addv s5, v5.4s
+; CHECK-GI-NEXT: fmov w10, s1
+; CHECK-GI-NEXT: add w8, w8, w9
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: uabdl v18.4s, v7.4h, v20.4h
+; CHECK-GI-NEXT: uabdl2 v7.4s, v7.8h, v20.8h
+; CHECK-GI-NEXT: add v6.4s, v6.4s, v17.4s
+; CHECK-GI-NEXT: add w8, w10, w8
+; CHECK-GI-NEXT: addv s0, v4.4s
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: fmov w9, s5
+; CHECK-GI-NEXT: add v7.4s, v18.4s, v7.4s
+; CHECK-GI-NEXT: addv s1, v6.4s
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: addv s2, v7.4s
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: fmov w9, s2
+; CHECK-GI-NEXT: add w0, w9, w8
+; CHECK-GI-NEXT: ret
entry:
%idx.ext8 = sext i32 %s2 to i64
%idx.ext = sext i32 %s1 to i64
>From 0a2704e77661dfc68d2b2e2e15865efda93faa98 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Thu, 25 Jan 2024 14:59:12 +0000
Subject: [PATCH 3/4] fixup! [AArch64][GlobalISel] Legalize G_ABS for
Larger/Smaller Vectors
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +-
llvm/test/CodeGen/AArch64/abs.ll | 49 +++++++++++++++----
2 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c88396d94788e47..7fbb4a976e75ff3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -993,7 +993,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
- .lowerIf(isScalar(0));
+ .lower();
// For fadd reductions we have pairwise operations available. We treat the
// usual legal types as legal and handle the lowering to pairwise instructions
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index e34119f3f2b0ddb..1318157fadc7a63 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -4,8 +4,6 @@
; CHECK-GI: warning: Instruction selection used fallback path for abs_v4i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v7i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v3i32
; ===== Legal Scalars =====
@@ -352,10 +350,28 @@ entry:
declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
define <7 x i16> @abs_v7i16(<7 x i16> %a){
-; CHECK-LABEL: abs_v7i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: abs v0.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abs_v7i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: abs v0.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_v7i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #15 // =0xf
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v2.16b, v1.16b
+; CHECK-GI-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[3], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[4], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[5], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[6], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[7], v0.h[0]
+; CHECK-GI-NEXT: neg v1.8h, v2.8h
+; CHECK-GI-NEXT: sshl v1.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: eor v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
entry:
%res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
ret <7 x i16> %res
@@ -363,10 +379,23 @@ entry:
declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
define <3 x i32> @abs_v3i32(<3 x i32> %a){
-; CHECK-LABEL: abs_v3i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abs_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #31 // =0x1f
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: mov v1.s[1], w8
+; CHECK-GI-NEXT: mov v1.s[2], w8
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: sshl v1.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: eor v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
entry:
%res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
ret <3 x i32> %res
>From ae9048fc958bc3bb1bf2efedc24a0807a91db9b3 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 26 Jan 2024 14:11:31 +0000
Subject: [PATCH 4/4] fixup! fixup! [AArch64][GlobalISel] Legalize G_ABS for
Larger/Smaller Vectors
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 +
llvm/test/CodeGen/AArch64/abs.ll | 47 ++++---------------
2 files changed, 9 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7fbb4a976e75ff3..7220efd807c2810 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -993,6 +993,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0)
.lower();
// For fadd reductions we have pairwise operations available. We treat the
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 1318157fadc7a63..934aae9ec74c037 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -350,28 +350,10 @@ entry:
declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
define <7 x i16> @abs_v7i16(<7 x i16> %a){
-; CHECK-SD-LABEL: abs_v7i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: abs v0.8h, v0.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: abs_v7i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #15 // =0xf
-; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: mov v2.16b, v1.16b
-; CHECK-GI-NEXT: mov v2.h[1], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[2], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[3], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[4], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[5], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[6], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NEXT: neg v1.8h, v2.8h
-; CHECK-GI-NEXT: sshl v1.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: eor v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: abs_v7i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.8h, v0.8h
+; CHECK-NEXT: ret
entry:
%res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
ret <7 x i16> %res
@@ -379,23 +361,10 @@ entry:
declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
define <3 x i32> @abs_v3i32(<3 x i32> %a){
-; CHECK-SD-LABEL: abs_v3i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: abs v0.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: abs_v3i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov w8, #31 // =0x1f
-; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: mov v1.s[1], w8
-; CHECK-GI-NEXT: mov v1.s[2], w8
-; CHECK-GI-NEXT: mov v1.s[3], w8
-; CHECK-GI-NEXT: neg v1.4s, v1.4s
-; CHECK-GI-NEXT: sshl v1.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: eor v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: abs_v3i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
ret <3 x i32> %res
More information about the llvm-commits
mailing list