[llvm] [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (PR #79117)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 02:43:34 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: None (chuongg3)

<details>
<summary>Changes</summary>

Legalize G_ABS for larger/smaller width vectors with legal element sizes

Fallsback for the smaller width vector tests because it is unable to legalize for G_ANYEXT smaller width vectors



---

Patch is 21.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79117.diff


4 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+1) 
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+4) 
- (added) llvm/test/CodeGen/AArch64/abs.ll (+350) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+102-110) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2cf319109273..60ceecb1ada83b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5245,6 +5245,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_BSWAP:
   case TargetOpcode::G_FCANONICALIZE:
   case TargetOpcode::G_SEXT_INREG:
+  case TargetOpcode::G_ABS:
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index fd69a7d6c33d03..e83cc06e51a1ae 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1001,6 +1001,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         .legalFor({s32, s64});
   ABSActions
       .legalFor(PackedVectorAllTypeList)
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
       .lowerIf(isScalar(0));
 
   // For fadd reductions we have pairwise operations available. We treat the
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
new file mode 100644
index 00000000000000..0f9048e72c9739
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -0,0 +1,350 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:         warning: Instruction selection used fallback path for abs_v4i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v2i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v7i16
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v3i32
+
+; ===== Legal Scalars =====
+
+define i8 @abs_i8(i8 %0, i8 %1, i8 %2){
+; CHECK-SD-LABEL: abs_i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sxtb w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sxtb w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #7
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+    %4 = call i8 @llvm.abs.i8(i8 %0, i1 0)
+    ret i8 %4
+}
+declare i8 @llvm.abs.i8(i8, i1)
+
+define i16 @abs_i16(i16 %0, i16 %1, i16 %2){
+; CHECK-SD-LABEL: abs_i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sxth w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sxth w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #15
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+    %4 = call i16 @llvm.abs.i16(i16 %0, i1 0)
+    ret i16 %4
+}
+declare i16 @llvm.abs.i16(i16, i1)
+
+define i32 @abs_i32(i32 %0, i32 %1, i32 %2){
+; CHECK-SD-LABEL: abs_i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    cneg w0, w0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    asr w8, w0, #31
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+    %4 = call i32 @llvm.abs.i32(i32 %0, i1 0)
+    ret i32 %4
+}
+declare i32 @llvm.abs.i32(i32, i1)
+
+define i64 @abs_i64(i64 %0, i64 %1, i64 %2){
+; CHECK-SD-LABEL: abs_i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmp x0, #0
+; CHECK-SD-NEXT:    cneg x0, x0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    asr x8, x0, #63
+; CHECK-GI-NEXT:    add x9, x0, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    ret
+    %4 = call i64 @llvm.abs.i64(i64 %0, i1 0)
+    ret i64 %4
+}
+declare i64 @llvm.abs.i64(i64, i1)
+
+define i128 @abs_i128(i128 %0, i128 %1, i128 %2){
+; CHECK-SD-LABEL: abs_i128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    asr x8, x1, #63
+; CHECK-SD-NEXT:    eor x9, x0, x8
+; CHECK-SD-NEXT:    eor x10, x1, x8
+; CHECK-SD-NEXT:    subs x0, x9, x8
+; CHECK-SD-NEXT:    sbc x1, x10, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    asr x8, x1, #63
+; CHECK-GI-NEXT:    adds x9, x0, x8
+; CHECK-GI-NEXT:    adc x10, x1, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    eor x1, x10, x8
+; CHECK-GI-NEXT:    ret
+    %4 = call i128 @llvm.abs.i128(i128 %0, i1 0)
+    ret i128 %4
+}
+declare i128 @llvm.abs.i128(i128, i1)
+
+; ===== Legal Vector Types =====
+
+define <8 x i8> @abs_v8i8(<8 x i8> %0){
+; CHECK-LABEL: abs_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+    %3 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %0, i1 0)
+    ret <8 x i8> %3
+}
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
+
+define <16 x i8> @abs_v16i8(<16 x i8> %0){
+; CHECK-LABEL: abs_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    ret
+    %3 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %0, i1 0)
+    ret <16 x i8> %3
+}
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
+
+define <4 x i16> @abs_v4i16(<4 x i16> %0){
+; CHECK-LABEL: abs_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+    %3 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %0, i1 0)
+    ret <4 x i16> %3
+}
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+
+define <8 x i16> @abs_v8i16(<8 x i16> %0){
+; CHECK-LABEL: abs_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+    %3 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %0, i1 0)
+    ret <8 x i16> %3
+}
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
+
+define <2 x i32> @abs_v2i32(<2 x i32> %0){
+; CHECK-LABEL: abs_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+    %3 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %0, i1 0)
+    ret <2 x i32> %3
+}
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
+
+define <4 x i32> @abs_v4i32(<4 x i32> %0){
+; CHECK-LABEL: abs_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+    %3 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %0, i1 0)
+    ret <4 x i32> %3
+}
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
+define <2 x i64> @abs_v2i64(<2 x i64> %0){
+; CHECK-LABEL: abs_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+    %3 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %0, i1 0)
+    ret <2 x i64> %3
+}
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <4 x i8> @abs_v4i8(<4 x i8> %0){
+; CHECK-LABEL: abs_v4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+    %3 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %0, i1 0)
+    ret <4 x i8> %3
+}
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <32 x i8> @abs_v32i8(<32 x i8> %0){
+; CHECK-LABEL: abs_v32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    abs v1.16b, v1.16b
+; CHECK-NEXT:    ret
+    %3 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %0, i1 0)
+    ret <32 x i8> %3
+}
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
+
+define <2 x i16> @abs_v2i16(<2 x i16> %0){
+; CHECK-LABEL: abs_v2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+    %3 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %0, i1 0)
+    ret <2 x i16> %3
+}
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+
+define <16 x i16> @abs_v16i16(<16 x i16> %0){
+; CHECK-LABEL: abs_v16i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    abs v1.8h, v1.8h
+; CHECK-NEXT:    ret
+    %3 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %0, i1 0)
+    ret <16 x i16> %3
+}
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+
+define <1 x i32> @abs_v1i32(<1 x i32> %0){
+; CHECK-SD-LABEL: abs_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    abs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    asr w9, w8, #31
+; CHECK-GI-NEXT:    add w8, w8, w9
+; CHECK-GI-NEXT:    eor w8, w8, w9
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+    %3 = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %0, i1 0)
+    ret <1 x i32> %3
+}
+declare <1 x i32> @llvm.abs.v1i32(<1 x i32>, i1)
+
+define <8 x i32> @abs_v8i32(<8 x i32> %0){
+; CHECK-LABEL: abs_v8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    ret
+    %3 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %0, i1 0)
+    ret <8 x i32> %3
+}
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
+
+define <4 x i64> @abs_v4i64(<4 x i64> %0){
+; CHECK-LABEL: abs_v4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    abs v1.2d, v1.2d
+; CHECK-NEXT:    ret
+    %3 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %0, i1 0)
+    ret <4 x i64> %3
+}
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i8> @abs_v3i8(<3 x i8> %0){
+; CHECK-SD-LABEL: abs_v3i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    abs v0.4h, v0.4h
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v3i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s1, w1
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    fmov s1, w2
+; CHECK-GI-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT:    mov v0.b[3], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[4], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[5], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[6], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[7], v0.b[0]
+; CHECK-GI-NEXT:    abs v0.8b, v0.8b
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
+    %3 = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %0, i1 0)
+    ret <3 x i8> %3
+}
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+
+define <7 x i8> @abs_v7i8(<7 x i8> %0){
+; CHECK-LABEL: abs_v7i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+    %3 = call <7 x i8> @llvm.abs.v7i8(<7 x i8> %0, i1 0)
+    ret <7 x i8> %3
+}
+declare <7 x i8> @llvm.abs.v7i8(<7 x i8>, i1)
+
+define <3 x i16> @abs_v3i16(<3 x i16> %0){
+; CHECK-LABEL: abs_v3i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+    %3 = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %0, i1 0)
+    ret <3 x i16> %3
+}
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
+
+define <7 x i16> @abs_v7i16(<7 x i16> %0){
+; CHECK-LABEL: abs_v7i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+    %3 = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %0, i1 0)
+    ret <7 x i16> %3
+}
+declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
+
+define <3 x i32> @abs_v3i32(<3 x i32> %0){
+; CHECK-LABEL: abs_v3i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+    %3 = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %0, i1 0)
+    ret <3 x i32> %3
+}
+declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index ad82d2e7955c27..1531154b8a03c2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4,7 +4,10 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
 
-; CHECK-GI-BASE:        warning: Instruction selection used fallback path for full
+; CHECK-GI-BASE:        warning: Instruction selection used fallback path for test_udot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_udot_v48i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v48i8
 
 define i32 @addv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: addv_v2i32:
@@ -5177,115 +5180,104 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-DOT-NEXT:    fmov w0, s0
 ; CHECK-SD-DOT-NEXT:    ret
 ;
-; CHECK-GI-BASE-LABEL: full:
-; CHECK-GI-BASE:       // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT:    ldr d0, [x2]
-; CHECK-GI-BASE-NEXT:    ldr d1, [x0]
-; CHECK-GI-BASE-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-BASE-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-BASE-NEXT:    sxtw x8, w3
-; CHECK-GI-BASE-NEXT:    sxtw x9, w1
-; CHECK-GI-BASE-NEXT:    uabdl v0.8h, v1.8b, v0.8b
-; CHECK-GI-BASE-NEXT:    add x11, x2, x8
-; CHECK-GI-BASE-NEXT:    add x10, x0, x9
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uaddlp v0.4s, v0.8h
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11, x8]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    addv s0, v0.4s
-; CHECK-GI-BASE-NEXT:    fmov w0, s0
-; CHECK-GI-BASE-NEXT:    ret
-;
-; CHECK-GI-DOT-LABEL: full:
-; CHECK-GI-DOT:       // %bb.0: // %entry
-; CHECK-GI-DOT-NEXT:    ldr d0, [x0]
-; CHECK-GI-DOT-NEXT:    ldr d1, [x2]
-; CHECK-GI-DOT-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-DOT-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-DOT-NEXT:    sxtw x8, w3
-; CHECK-GI-DOT-NEXT:    sxtw x9, w1
-; CHECK-GI-DOT-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-GI-DOT-NEXT:    movi v3.8b, #1
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v0.8b, v1.8b
-; CHECK-GI-DOT-NEXT:    add x11, x2, x8
-; CHECK-GI-DOT-NEXT:    add x10, x0, x9
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11, x8]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
-; CHECK-GI-DOT-NEXT:    fmov w0, s0
-; CHECK-GI-DOT-NEXT:    ret
+; CHECK-GI-LABEL: full:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-GI-NEXT:    sxtw x8, w1
+; CHECK-GI-NEXT:    sxtw x9, w3
+; CHECK-GI-NEXT:    ldr d0, [x0]
+; CHECK-GI-NEXT:    ldr d1, [x2]
+; CHECK-GI-NEXT:    add x10, x0, x8
+; CHECK-GI-NEXT:    add x11, x2, x9
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ldr d2, [x10]
+; CHECK-GI-NEXT:    ldr d3, [x11]
+; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT:    ushll v3.8h, v3.8b, #0
+; CHECK-GI-NEXT:    ldr d4, [x10]
+; CHECK-GI-NEXT:    ldr d5, [x11]
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v6.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    uabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ldr d1, [x10]
+; CHECK-GI-NEXT:    ushll v4.8h, v4.8b, #0
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    ldr d7, [x11]
+; CHECK-GI-NEXT:    uabdl v16.4s, v2.4h, v3.4h
+; CHECK-GI-NEXT:    uabdl2 v2.4s, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ushll v7.8h, v7.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v1.4s, v4.4h, v5.4h
+; CHECK-GI-NEXT:    uabdl2 v4.4s, v4.8h, v5.8h
+; CHECK-GI-NEXT:    ldr d5, [x10]
+; CHECK-GI-NEXT:    add v2.4s, v16.4s, v2.4s
+; CHECK-GI-NEXT:    ldr d16, [x11]
+; CHECK-GI-NEXT:    add v0.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT:    uabdl v6.4s, v3.4h, v7.4h
+; CHECK-GI-NEXT:    uabdl2 v3.4s, v3.8h, v7.8h
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    ushll v7.8h, v16.8b, #0
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ldr d16, [x10]
+; CHECK-GI-NEXT:    ldr d17, [x11]
+; CHECK-GI-NEXT:    add v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    add v3.4s, v6.4s, v...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/79117


More information about the llvm-commits mailing list