[llvm] [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (PR #79117)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 28 12:11:59 PST 2024


https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/79117

>From d6764fcdfd5eade0be5a54afaa230389925a2f2e Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Mon, 22 Jan 2024 17:52:05 +0000
Subject: [PATCH 1/4] [AArch64][GlobalISel] Pre-Commit Test Update for Legalize
 G_ABS Large Vector

---
 llvm/test/CodeGen/AArch64/abs.ll           | 363 +++++++++++++++++++++
 llvm/test/CodeGen/AArch64/vecreduce-add.ll |   8 +-
 2 files changed, 370 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/abs.ll

diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
new file mode 100644
index 000000000000000..9cbb1af98c1730c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:         warning: Instruction selection used fallback path for abs_v4i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v32i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v2i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v16i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v8i32
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v4i64
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i32
+
+; ===== Legal Scalars =====
+
+define i8 @abs_i8(i8 %a){
+; CHECK-SD-LABEL: abs_i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sxtb w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sxtb w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #7
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i8 @llvm.abs.i8(i8 %a, i1 0)
+  ret i8 %res
+}
+declare i8 @llvm.abs.i8(i8, i1)
+
+define i16 @abs_i16(i16 %a){
+; CHECK-SD-LABEL: abs_i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sxth w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sxth w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #15
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i16 @llvm.abs.i16(i16 %a, i1 0)
+  ret i16 %res
+}
+declare i16 @llvm.abs.i16(i16, i1)
+
+define i32 @abs_i32(i32 %a){
+; CHECK-SD-LABEL: abs_i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    cneg w0, w0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr w8, w0, #31
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i32 @llvm.abs.i32(i32 %a, i1 0)
+  ret i32 %res
+}
+declare i32 @llvm.abs.i32(i32, i1)
+
+define i64 @abs_i64(i64 %a){
+; CHECK-SD-LABEL: abs_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmp x0, #0
+; CHECK-SD-NEXT:    cneg x0, x0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr x8, x0, #63
+; CHECK-GI-NEXT:    add x9, x0, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i64 @llvm.abs.i64(i64 %a, i1 0)
+  ret i64 %res
+}
+declare i64 @llvm.abs.i64(i64, i1)
+
+define i128 @abs_i128(i128 %a){
+; CHECK-SD-LABEL: abs_i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    asr x8, x1, #63
+; CHECK-SD-NEXT:    eor x9, x0, x8
+; CHECK-SD-NEXT:    eor x10, x1, x8
+; CHECK-SD-NEXT:    subs x0, x9, x8
+; CHECK-SD-NEXT:    sbc x1, x10, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr x8, x1, #63
+; CHECK-GI-NEXT:    adds x9, x0, x8
+; CHECK-GI-NEXT:    adc x10, x1, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    eor x1, x10, x8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i128 @llvm.abs.i128(i128 %a, i1 0)
+  ret i128 %res
+}
+declare i128 @llvm.abs.i128(i128, i1)
+
+; ===== Legal Vector Types =====
+
+define <8 x i8> @abs_v8i8(<8 x i8> %a){
+; CHECK-LABEL: abs_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %a, i1 0)
+  ret <8 x i8> %res
+}
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
+
+define <16 x i8> @abs_v16i8(<16 x i8> %a){
+; CHECK-LABEL: abs_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
+
+define <4 x i16> @abs_v4i16(<4 x i16> %a){
+; CHECK-LABEL: abs_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %a, i1 0)
+  ret <4 x i16> %res
+}
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+
+define <8 x i16> @abs_v8i16(<8 x i16> %a){
+; CHECK-LABEL: abs_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
+
+define <2 x i32> @abs_v2i32(<2 x i32> %a){
+; CHECK-LABEL: abs_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %a, i1 0)
+  ret <2 x i32> %res
+}
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
+
+define <4 x i32> @abs_v4i32(<4 x i32> %a){
+; CHECK-LABEL: abs_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
+define <2 x i64> @abs_v2i64(<2 x i64> %a){
+; CHECK-LABEL: abs_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <4 x i8> @abs_v4i8(<4 x i8> %a){
+; CHECK-LABEL: abs_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %a, i1 0)
+  ret <4 x i8> %res
+}
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <32 x i8> @abs_v32i8(<32 x i8> %a){
+; CHECK-LABEL: abs_v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    abs v1.16b, v1.16b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
+
+define <2 x i16> @abs_v2i16(<2 x i16> %a){
+; CHECK-LABEL: abs_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %a, i1 0)
+  ret <2 x i16> %res
+}
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+
+define <16 x i16> @abs_v16i16(<16 x i16> %a){
+; CHECK-LABEL: abs_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    abs v1.8h, v1.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+
+define <1 x i32> @abs_v1i32(<1 x i32> %a){
+; CHECK-SD-LABEL: abs_v1i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    abs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v1i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    asr w9, w8, #31
+; CHECK-GI-NEXT:    add w8, w8, w9
+; CHECK-GI-NEXT:    eor w8, w8, w9
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)
+  ret <1 x i32> %res
+}
+declare <1 x i32> @llvm.abs.v1i32(<1 x i32>, i1)
+
+define <8 x i32> @abs_v8i32(<8 x i32> %a){
+; CHECK-LABEL: abs_v8i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 0)
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
+
+define <4 x i64> @abs_v4i64(<4 x i64> %a){
+; CHECK-LABEL: abs_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    abs v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i8> @abs_v3i8(<3 x i8> %a){
+; CHECK-LABEL: abs_v3i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    mov v0.h[1], w1
+; CHECK-NEXT:    mov v0.h[2], w2
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    umov w0, v0.h[0]
+; CHECK-NEXT:    umov w1, v0.h[1]
+; CHECK-NEXT:    umov w2, v0.h[2]
+; CHECK-NEXT:    ret
+entry:
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
+  ret <3 x i8> %res
+}
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+
+define <7 x i8> @abs_v7i8(<7 x i8> %a){
+; CHECK-LABEL: abs_v7i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <7 x i8> @llvm.abs.v7i8(<7 x i8> %a, i1 0)
+  ret <7 x i8> %res
+}
+declare <7 x i8> @llvm.abs.v7i8(<7 x i8>, i1)
+
+define <3 x i16> @abs_v3i16(<3 x i16> %a){
+; CHECK-LABEL: abs_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %a, i1 0)
+  ret <3 x i16> %res
+}
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
+
+define <7 x i16> @abs_v7i16(<7 x i16> %a){
+; CHECK-LABEL: abs_v7i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
+  ret <7 x i16> %res
+}
+declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
+
+define <3 x i32> @abs_v3i32(<3 x i32> %a){
+; CHECK-LABEL: abs_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
+  ret <3 x i32> %res
+}
+declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index ad82d2e7955c27d..56fbb4f2a2b4d15 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4,7 +4,13 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
 
-; CHECK-GI-BASE:        warning: Instruction selection used fallback path for full
+; CHECK-GI-BASE:        warning: Instruction selection used fallback path for test_udot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_udot_v48i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v48i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for full
+
+; CHECK-GI-DOT:         warning: Instruction selection used fallback path for full
 
 define i32 @addv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: addv_v2i32:

>From fc45fe03d9677d54f58afb7f5d981b8c50e57a1d Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Mon, 22 Jan 2024 17:58:48 +0000
Subject: [PATCH 2/4] [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller
 Vectors

Legalizing for non-legal width vectors with legal element size
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   1 +
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   7 +-
 llvm/test/CodeGen/AArch64/abs.ll              |  49 ++--
 llvm/test/CodeGen/AArch64/vecreduce-add.ll    | 210 ++++++++----------
 4 files changed, 134 insertions(+), 133 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6c06afa0979e5bb..464ff0864d146ff 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5254,6 +5254,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_BSWAP:
   case TargetOpcode::G_FCANONICALIZE:
   case TargetOpcode::G_SEXT_INREG:
+  case TargetOpcode::G_ABS:
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 500db122ca50e75..c88396d94788e47 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -988,8 +988,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   if (HasCSSC)
     ABSActions
         .legalFor({s32, s64});
-  ABSActions
-      .legalFor(PackedVectorAllTypeList)
+  ABSActions.legalFor(PackedVectorAllTypeList)
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
       .lowerIf(isScalar(0));
 
   // For fadd reductions we have pairwise operations available. We treat the
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 9cbb1af98c1730c..e34119f3f2b0ddb 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -3,14 +3,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; CHECK-GI:         warning: Instruction selection used fallback path for abs_v4i8
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v32i8
 ; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v2i16
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v16i16
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v8i32
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v4i64
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i8
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i8
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i16
 ; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i16
 ; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i32
 
@@ -300,18 +293,36 @@ declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
 ; ===== Vectors with Non-Pow 2 Widths =====
 
 define <3 x i8> @abs_v3i8(<3 x i8> %a){
-; CHECK-LABEL: abs_v3i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    mov v0.h[1], w1
-; CHECK-NEXT:    mov v0.h[2], w2
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
-; CHECK-NEXT:    abs v0.4h, v0.4h
-; CHECK-NEXT:    umov w0, v0.h[0]
-; CHECK-NEXT:    umov w1, v0.h[1]
-; CHECK-NEXT:    umov w2, v0.h[2]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abs_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    abs v0.4h, v0.4h
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s1, w1
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    fmov s1, w2
+; CHECK-GI-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT:    mov v0.b[3], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[4], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[5], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[6], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[7], v0.b[0]
+; CHECK-GI-NEXT:    abs v0.8b, v0.8b
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
 entry:
   %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
   ret <3 x i8> %res
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 56fbb4f2a2b4d15..1531154b8a03c2e 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -8,9 +8,6 @@
 ; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_udot_v48i8
 ; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v24i8
 ; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v48i8
-; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for full
-
-; CHECK-GI-DOT:         warning: Instruction selection used fallback path for full
 
 define i32 @addv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: addv_v2i32:
@@ -5183,115 +5180,104 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-DOT-NEXT:    fmov w0, s0
 ; CHECK-SD-DOT-NEXT:    ret
 ;
-; CHECK-GI-BASE-LABEL: full:
-; CHECK-GI-BASE:       // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT:    ldr d0, [x2]
-; CHECK-GI-BASE-NEXT:    ldr d1, [x0]
-; CHECK-GI-BASE-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-BASE-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-BASE-NEXT:    sxtw x8, w3
-; CHECK-GI-BASE-NEXT:    sxtw x9, w1
-; CHECK-GI-BASE-NEXT:    uabdl v0.8h, v1.8b, v0.8b
-; CHECK-GI-BASE-NEXT:    add x11, x2, x8
-; CHECK-GI-BASE-NEXT:    add x10, x0, x9
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uaddlp v0.4s, v0.8h
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11, x8]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    addv s0, v0.4s
-; CHECK-GI-BASE-NEXT:    fmov w0, s0
-; CHECK-GI-BASE-NEXT:    ret
-;
-; CHECK-GI-DOT-LABEL: full:
-; CHECK-GI-DOT:       // %bb.0: // %entry
-; CHECK-GI-DOT-NEXT:    ldr d0, [x0]
-; CHECK-GI-DOT-NEXT:    ldr d1, [x2]
-; CHECK-GI-DOT-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-DOT-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-DOT-NEXT:    sxtw x8, w3
-; CHECK-GI-DOT-NEXT:    sxtw x9, w1
-; CHECK-GI-DOT-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-GI-DOT-NEXT:    movi v3.8b, #1
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v0.8b, v1.8b
-; CHECK-GI-DOT-NEXT:    add x11, x2, x8
-; CHECK-GI-DOT-NEXT:    add x10, x0, x9
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11, x8]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
-; CHECK-GI-DOT-NEXT:    fmov w0, s0
-; CHECK-GI-DOT-NEXT:    ret
+; CHECK-GI-LABEL: full:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-GI-NEXT:    sxtw x8, w1
+; CHECK-GI-NEXT:    sxtw x9, w3
+; CHECK-GI-NEXT:    ldr d0, [x0]
+; CHECK-GI-NEXT:    ldr d1, [x2]
+; CHECK-GI-NEXT:    add x10, x0, x8
+; CHECK-GI-NEXT:    add x11, x2, x9
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ldr d2, [x10]
+; CHECK-GI-NEXT:    ldr d3, [x11]
+; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT:    ushll v3.8h, v3.8b, #0
+; CHECK-GI-NEXT:    ldr d4, [x10]
+; CHECK-GI-NEXT:    ldr d5, [x11]
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v6.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    uabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ldr d1, [x10]
+; CHECK-GI-NEXT:    ushll v4.8h, v4.8b, #0
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    ldr d7, [x11]
+; CHECK-GI-NEXT:    uabdl v16.4s, v2.4h, v3.4h
+; CHECK-GI-NEXT:    uabdl2 v2.4s, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ushll v7.8h, v7.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v1.4s, v4.4h, v5.4h
+; CHECK-GI-NEXT:    uabdl2 v4.4s, v4.8h, v5.8h
+; CHECK-GI-NEXT:    ldr d5, [x10]
+; CHECK-GI-NEXT:    add v2.4s, v16.4s, v2.4s
+; CHECK-GI-NEXT:    ldr d16, [x11]
+; CHECK-GI-NEXT:    add v0.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT:    uabdl v6.4s, v3.4h, v7.4h
+; CHECK-GI-NEXT:    uabdl2 v3.4s, v3.8h, v7.8h
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    ushll v7.8h, v16.8b, #0
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ldr d16, [x10]
+; CHECK-GI-NEXT:    ldr d17, [x11]
+; CHECK-GI-NEXT:    add v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    add v3.4s, v6.4s, v3.4s
+; CHECK-GI-NEXT:    ushll v16.8h, v16.8b, #0
+; CHECK-GI-NEXT:    ushll v17.8h, v17.8b, #0
+; CHECK-GI-NEXT:    uabdl v22.4s, v5.4h, v7.4h
+; CHECK-GI-NEXT:    uabdl2 v5.4s, v5.8h, v7.8h
+; CHECK-GI-NEXT:    ldr d18, [x10]
+; CHECK-GI-NEXT:    ldr d19, [x11]
+; CHECK-GI-NEXT:    addv s0, v0.4s
+; CHECK-GI-NEXT:    addv s2, v2.4s
+; CHECK-GI-NEXT:    addv s1, v1.4s
+; CHECK-GI-NEXT:    ushll v18.8h, v18.8b, #0
+; CHECK-GI-NEXT:    ushll v19.8h, v19.8b, #0
+; CHECK-GI-NEXT:    uabdl v4.4s, v16.4h, v17.4h
+; CHECK-GI-NEXT:    uabdl2 v16.4s, v16.8h, v17.8h
+; CHECK-GI-NEXT:    add v5.4s, v22.4s, v5.4s
+; CHECK-GI-NEXT:    ldr d20, [x10, x8]
+; CHECK-GI-NEXT:    ldr d21, [x11, x9]
+; CHECK-GI-NEXT:    addv s3, v3.4s
+; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    fmov w9, s0
+; CHECK-GI-NEXT:    ushll v7.8h, v20.8b, #0
+; CHECK-GI-NEXT:    ushll v20.8h, v21.8b, #0
+; CHECK-GI-NEXT:    uabdl v6.4s, v18.4h, v19.4h
+; CHECK-GI-NEXT:    uabdl2 v17.4s, v18.8h, v19.8h
+; CHECK-GI-NEXT:    add v4.4s, v4.4s, v16.4s
+; CHECK-GI-NEXT:    addv s5, v5.4s
+; CHECK-GI-NEXT:    fmov w10, s1
+; CHECK-GI-NEXT:    add w8, w8, w9
+; CHECK-GI-NEXT:    fmov w9, s3
+; CHECK-GI-NEXT:    uabdl v18.4s, v7.4h, v20.4h
+; CHECK-GI-NEXT:    uabdl2 v7.4s, v7.8h, v20.8h
+; CHECK-GI-NEXT:    add v6.4s, v6.4s, v17.4s
+; CHECK-GI-NEXT:    add w8, w10, w8
+; CHECK-GI-NEXT:    addv s0, v4.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s5
+; CHECK-GI-NEXT:    add v7.4s, v18.4s, v7.4s
+; CHECK-GI-NEXT:    addv s1, v6.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s0
+; CHECK-GI-NEXT:    addv s2, v7.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s2
+; CHECK-GI-NEXT:    add w0, w9, w8
+; CHECK-GI-NEXT:    ret
 entry:
   %idx.ext8 = sext i32 %s2 to i64
   %idx.ext = sext i32 %s1 to i64

>From 0a2704e77661dfc68d2b2e2e15865efda93faa98 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Thu, 25 Jan 2024 14:59:12 +0000
Subject: [PATCH 3/4] fixup! [AArch64][GlobalISel] Legalize G_ABS for
 Larger/Smaller Vectors

---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  2 +-
 llvm/test/CodeGen/AArch64/abs.ll              | 49 +++++++++++++++----
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c88396d94788e47..7fbb4a976e75ff3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -993,7 +993,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
-      .lowerIf(isScalar(0));
+      .lower();
 
   // For fadd reductions we have pairwise operations available. We treat the
   // usual legal types as legal and handle the lowering to pairwise instructions
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index e34119f3f2b0ddb..1318157fadc7a63 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -4,8 +4,6 @@
 
 ; CHECK-GI:         warning: Instruction selection used fallback path for abs_v4i8
 ; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v2i16
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i16
-; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i32
 
 ; ===== Legal Scalars =====
 
@@ -352,10 +350,28 @@ entry:
 declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 
 define <7 x i16> @abs_v7i16(<7 x i16> %a){
-; CHECK-LABEL: abs_v7i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    abs v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abs_v7i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    abs v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v7i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #15 // =0xf
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v2.16b, v1.16b
+; CHECK-GI-NEXT:    mov v2.h[1], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[2], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[3], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[4], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[5], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[6], v1.h[0]
+; CHECK-GI-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-GI-NEXT:    neg v1.8h, v2.8h
+; CHECK-GI-NEXT:    sshl v1.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
   ret <7 x i16> %res
@@ -363,10 +379,23 @@ entry:
 declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
 
 define <3 x i32> @abs_v3i32(<3 x i32> %a){
-; CHECK-LABEL: abs_v3i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abs_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.s[1], w8
+; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    neg v1.4s, v1.4s
+; CHECK-GI-NEXT:    sshl v1.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
   ret <3 x i32> %res

>From ae9048fc958bc3bb1bf2efedc24a0807a91db9b3 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 26 Jan 2024 14:11:31 +0000
Subject: [PATCH 4/4] fixup! fixup! [AArch64][GlobalISel] Legalize G_ABS for
 Larger/Smaller Vectors

---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  1 +
 llvm/test/CodeGen/AArch64/abs.ll              | 47 ++++---------------
 2 files changed, 9 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7fbb4a976e75ff3..7220efd807c2810 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -993,6 +993,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
+      .moreElementsToNextPow2(0)
       .lower();
 
   // For fadd reductions we have pairwise operations available. We treat the
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 1318157fadc7a63..934aae9ec74c037 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -350,28 +350,10 @@ entry:
 declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 
 define <7 x i16> @abs_v7i16(<7 x i16> %a){
-; CHECK-SD-LABEL: abs_v7i16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    abs v0.8h, v0.8h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: abs_v7i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov w8, #15 // =0xf
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    mov v2.h[1], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[2], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[3], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[4], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[5], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[6], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[7], v0.h[0]
-; CHECK-GI-NEXT:    neg v1.8h, v2.8h
-; CHECK-GI-NEXT:    sshl v1.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT:    add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: abs_v7i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
 entry:
   %res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
   ret <7 x i16> %res
@@ -379,23 +361,10 @@ entry:
 declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
 
 define <3 x i32> @abs_v3i32(<3 x i32> %a){
-; CHECK-SD-LABEL: abs_v3i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    abs v0.4s, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: abs_v3i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    mov v1.s[1], w8
-; CHECK-GI-NEXT:    mov v1.s[2], w8
-; CHECK-GI-NEXT:    mov v1.s[3], w8
-; CHECK-GI-NEXT:    neg v1.4s, v1.4s
-; CHECK-GI-NEXT:    sshl v1.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: abs_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
   ret <3 x i32> %res



More information about the llvm-commits mailing list