[llvm] 2c552d3 - [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (#79117)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 28 12:21:42 PST 2024


Author: chuongg3
Date: 2024-01-28T20:21:38Z
New Revision: 2c552d319a5f0378c48390524de436265b05b943

URL: https://github.com/llvm/llvm-project/commit/2c552d319a5f0378c48390524de436265b05b943
DIFF: https://github.com/llvm/llvm-project/commit/2c552d319a5f0378c48390524de436265b05b943.diff

LOG: [AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (#79117)

Legalize G_ABS for larger/smaller width vectors with legal element sizes

Fallsback for the smaller width vector tests because it is unable to
legalize for G_ANYEXT smaller width vectors

Added: 
    llvm/test/CodeGen/AArch64/abs.ll

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/vecreduce-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6c06afa0979e5bb..464ff0864d146ff 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5254,6 +5254,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_BSWAP:
   case TargetOpcode::G_FCANONICALIZE:
   case TargetOpcode::G_SEXT_INREG:
+  case TargetOpcode::G_ABS:
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 500db122ca50e75..7220efd807c2810 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -988,9 +988,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   if (HasCSSC)
     ABSActions
         .legalFor({s32, s64});
-  ABSActions
-      .legalFor(PackedVectorAllTypeList)
-      .lowerIf(isScalar(0));
+  ABSActions.legalFor(PackedVectorAllTypeList)
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
+      .moreElementsToNextPow2(0)
+      .lower();
 
   // For fadd reductions we have pairwise operations available. We treat the
   // usual legal types as legal and handle the lowering to pairwise instructions

diff  --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
new file mode 100644
index 000000000000000..934aae9ec74c037
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -0,0 +1,372 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:         warning: Instruction selection used fallback path for abs_v4i8
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for abs_v2i16
+
+; ===== Legal Scalars =====
+
+define i8 @abs_i8(i8 %a){
+; CHECK-SD-LABEL: abs_i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sxtb w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sxtb w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #7
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i8 @llvm.abs.i8(i8 %a, i1 0)
+  ret i8 %res
+}
+declare i8 @llvm.abs.i8(i8, i1)
+
+define i16 @abs_i16(i16 %a){
+; CHECK-SD-LABEL: abs_i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sxth w8, w0
+; CHECK-SD-NEXT:    cmp w8, #0
+; CHECK-SD-NEXT:    cneg w0, w8, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sxth w8, w0
+; CHECK-GI-NEXT:    asr w8, w8, #15
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i16 @llvm.abs.i16(i16 %a, i1 0)
+  ret i16 %res
+}
+declare i16 @llvm.abs.i16(i16, i1)
+
+define i32 @abs_i32(i32 %a){
+; CHECK-SD-LABEL: abs_i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    cneg w0, w0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr w8, w0, #31
+; CHECK-GI-NEXT:    add w9, w0, w8
+; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i32 @llvm.abs.i32(i32 %a, i1 0)
+  ret i32 %res
+}
+declare i32 @llvm.abs.i32(i32, i1)
+
+define i64 @abs_i64(i64 %a){
+; CHECK-SD-LABEL: abs_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmp x0, #0
+; CHECK-SD-NEXT:    cneg x0, x0, mi
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr x8, x0, #63
+; CHECK-GI-NEXT:    add x9, x0, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i64 @llvm.abs.i64(i64 %a, i1 0)
+  ret i64 %res
+}
+declare i64 @llvm.abs.i64(i64, i1)
+
+define i128 @abs_i128(i128 %a){
+; CHECK-SD-LABEL: abs_i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    asr x8, x1, #63
+; CHECK-SD-NEXT:    eor x9, x0, x8
+; CHECK-SD-NEXT:    eor x10, x1, x8
+; CHECK-SD-NEXT:    subs x0, x9, x8
+; CHECK-SD-NEXT:    sbc x1, x10, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_i128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    asr x8, x1, #63
+; CHECK-GI-NEXT:    adds x9, x0, x8
+; CHECK-GI-NEXT:    adc x10, x1, x8
+; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    eor x1, x10, x8
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call i128 @llvm.abs.i128(i128 %a, i1 0)
+  ret i128 %res
+}
+declare i128 @llvm.abs.i128(i128, i1)
+
+; ===== Legal Vector Types =====
+
+define <8 x i8> @abs_v8i8(<8 x i8> %a){
+; CHECK-LABEL: abs_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %a, i1 0)
+  ret <8 x i8> %res
+}
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
+
+define <16 x i8> @abs_v16i8(<16 x i8> %a){
+; CHECK-LABEL: abs_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
+
+define <4 x i16> @abs_v4i16(<4 x i16> %a){
+; CHECK-LABEL: abs_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %a, i1 0)
+  ret <4 x i16> %res
+}
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+
+define <8 x i16> @abs_v8i16(<8 x i16> %a){
+; CHECK-LABEL: abs_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
+
+define <2 x i32> @abs_v2i32(<2 x i32> %a){
+; CHECK-LABEL: abs_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %a, i1 0)
+  ret <2 x i32> %res
+}
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
+
+define <4 x i32> @abs_v4i32(<4 x i32> %a){
+; CHECK-LABEL: abs_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
+define <2 x i64> @abs_v2i64(<2 x i64> %a){
+; CHECK-LABEL: abs_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <4 x i8> @abs_v4i8(<4 x i8> %a){
+; CHECK-LABEL: abs_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %a, i1 0)
+  ret <4 x i8> %res
+}
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <32 x i8> @abs_v32i8(<32 x i8> %a){
+; CHECK-LABEL: abs_v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.16b, v0.16b
+; CHECK-NEXT:    abs v1.16b, v1.16b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
+
+define <2 x i16> @abs_v2i16(<2 x i16> %a){
+; CHECK-LABEL: abs_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %a, i1 0)
+  ret <2 x i16> %res
+}
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+
+define <16 x i16> @abs_v16i16(<16 x i16> %a){
+; CHECK-LABEL: abs_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    abs v1.8h, v1.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+
+define <1 x i32> @abs_v1i32(<1 x i32> %a){
+; CHECK-SD-LABEL: abs_v1i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    abs v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v1i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    asr w9, w8, #31
+; CHECK-GI-NEXT:    add w8, w8, w9
+; CHECK-GI-NEXT:    eor w8, w8, w9
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)
+  ret <1 x i32> %res
+}
+declare <1 x i32> @llvm.abs.v1i32(<1 x i32>, i1)
+
+define <8 x i32> @abs_v8i32(<8 x i32> %a){
+; CHECK-LABEL: abs_v8i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 0)
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
+
+define <4 x i64> @abs_v4i64(<4 x i64> %a){
+; CHECK-LABEL: abs_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.2d, v0.2d
+; CHECK-NEXT:    abs v1.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i8> @abs_v3i8(<3 x i8> %a){
+; CHECK-SD-LABEL: abs_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    abs v0.4h, v0.4h
+; CHECK-SD-NEXT:    umov w0, v0.h[0]
+; CHECK-SD-NEXT:    umov w1, v0.h[1]
+; CHECK-SD-NEXT:    umov w2, v0.h[2]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abs_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    fmov s1, w1
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    fmov s1, w2
+; CHECK-GI-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT:    mov v0.b[3], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[4], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[5], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[6], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[7], v0.b[0]
+; CHECK-GI-NEXT:    abs v0.8b, v0.8b
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
+  ret <3 x i8> %res
+}
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+
+define <7 x i8> @abs_v7i8(<7 x i8> %a){
+; CHECK-LABEL: abs_v7i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8b, v0.8b
+; CHECK-NEXT:    ret
+entry:
+  %res = call <7 x i8> @llvm.abs.v7i8(<7 x i8> %a, i1 0)
+  ret <7 x i8> %res
+}
+declare <7 x i8> @llvm.abs.v7i8(<7 x i8>, i1)
+
+define <3 x i16> @abs_v3i16(<3 x i16> %a){
+; CHECK-LABEL: abs_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %a, i1 0)
+  ret <3 x i16> %res
+}
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
+
+define <7 x i16> @abs_v7i16(<7 x i16> %a){
+; CHECK-LABEL: abs_v7i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.8h, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
+  ret <7 x i16> %res
+}
+declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
+
+define <3 x i32> @abs_v3i32(<3 x i32> %a){
+; CHECK-LABEL: abs_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
+  ret <3 x i32> %res
+}
+declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index ad82d2e7955c27d..1531154b8a03c2e 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4,7 +4,10 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-BASE
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - -mattr=+dotprod 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-DOT
 
-; CHECK-GI-BASE:        warning: Instruction selection used fallback path for full
+; CHECK-GI-BASE:        warning: Instruction selection used fallback path for test_udot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_udot_v48i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v24i8
+; CHECK-GI-BASE-NEXT:   warning: Instruction selection used fallback path for test_sdot_v48i8
 
 define i32 @addv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: addv_v2i32:
@@ -5177,115 +5180,104 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-DOT-NEXT:    fmov w0, s0
 ; CHECK-SD-DOT-NEXT:    ret
 ;
-; CHECK-GI-BASE-LABEL: full:
-; CHECK-GI-BASE:       // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT:    ldr d0, [x2]
-; CHECK-GI-BASE-NEXT:    ldr d1, [x0]
-; CHECK-GI-BASE-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-BASE-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-BASE-NEXT:    sxtw x8, w3
-; CHECK-GI-BASE-NEXT:    sxtw x9, w1
-; CHECK-GI-BASE-NEXT:    uabdl v0.8h, v1.8b, v0.8b
-; CHECK-GI-BASE-NEXT:    add x11, x2, x8
-; CHECK-GI-BASE-NEXT:    add x10, x0, x9
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uaddlp v0.4s, v0.8h
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    add x11, x11, x8
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    add x10, x10, x9
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    ldr d2, [x11, x8]
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-BASE-NEXT:    uabdl v1.8h, v1.8b, v2.8b
-; CHECK-GI-BASE-NEXT:    uadalp v0.4s, v1.8h
-; CHECK-GI-BASE-NEXT:    addv s0, v0.4s
-; CHECK-GI-BASE-NEXT:    fmov w0, s0
-; CHECK-GI-BASE-NEXT:    ret
-;
-; CHECK-GI-DOT-LABEL: full:
-; CHECK-GI-DOT:       // %bb.0: // %entry
-; CHECK-GI-DOT-NEXT:    ldr d0, [x0]
-; CHECK-GI-DOT-NEXT:    ldr d1, [x2]
-; CHECK-GI-DOT-NEXT:    // kill: def $w3 killed $w3 def $x3
-; CHECK-GI-DOT-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-DOT-NEXT:    sxtw x8, w3
-; CHECK-GI-DOT-NEXT:    sxtw x9, w1
-; CHECK-GI-DOT-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-GI-DOT-NEXT:    movi v3.8b, #1
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v0.8b, v1.8b
-; CHECK-GI-DOT-NEXT:    add x11, x2, x8
-; CHECK-GI-DOT-NEXT:    add x10, x0, x9
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    add x10, x10, x9
-; CHECK-GI-DOT-NEXT:    add x11, x11, x8
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    ldr d1, [x10, x9]
-; CHECK-GI-DOT-NEXT:    ldr d4, [x11, x8]
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    uabd v0.8b, v1.8b, v4.8b
-; CHECK-GI-DOT-NEXT:    udot v2.2s, v0.8b, v3.8b
-; CHECK-GI-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
-; CHECK-GI-DOT-NEXT:    fmov w0, s0
-; CHECK-GI-DOT-NEXT:    ret
+; CHECK-GI-LABEL: full:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-GI-NEXT:    sxtw x8, w1
+; CHECK-GI-NEXT:    sxtw x9, w3
+; CHECK-GI-NEXT:    ldr d0, [x0]
+; CHECK-GI-NEXT:    ldr d1, [x2]
+; CHECK-GI-NEXT:    add x10, x0, x8
+; CHECK-GI-NEXT:    add x11, x2, x9
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    ldr d2, [x10]
+; CHECK-GI-NEXT:    ldr d3, [x11]
+; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ushll v2.8h, v2.8b, #0
+; CHECK-GI-NEXT:    ushll v3.8h, v3.8b, #0
+; CHECK-GI-NEXT:    ldr d4, [x10]
+; CHECK-GI-NEXT:    ldr d5, [x11]
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v6.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    uabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ldr d1, [x10]
+; CHECK-GI-NEXT:    ushll v4.8h, v4.8b, #0
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    ldr d7, [x11]
+; CHECK-GI-NEXT:    uabdl v16.4s, v2.4h, v3.4h
+; CHECK-GI-NEXT:    uabdl2 v2.4s, v2.8h, v3.8h
+; CHECK-GI-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ushll v7.8h, v7.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    uabdl v1.4s, v4.4h, v5.4h
+; CHECK-GI-NEXT:    uabdl2 v4.4s, v4.8h, v5.8h
+; CHECK-GI-NEXT:    ldr d5, [x10]
+; CHECK-GI-NEXT:    add v2.4s, v16.4s, v2.4s
+; CHECK-GI-NEXT:    ldr d16, [x11]
+; CHECK-GI-NEXT:    add v0.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT:    uabdl v6.4s, v3.4h, v7.4h
+; CHECK-GI-NEXT:    uabdl2 v3.4s, v3.8h, v7.8h
+; CHECK-GI-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    ushll v7.8h, v16.8b, #0
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    ldr d16, [x10]
+; CHECK-GI-NEXT:    ldr d17, [x11]
+; CHECK-GI-NEXT:    add v1.4s, v1.4s, v4.4s
+; CHECK-GI-NEXT:    add x10, x10, x8
+; CHECK-GI-NEXT:    add x11, x11, x9
+; CHECK-GI-NEXT:    add v3.4s, v6.4s, v3.4s
+; CHECK-GI-NEXT:    ushll v16.8h, v16.8b, #0
+; CHECK-GI-NEXT:    ushll v17.8h, v17.8b, #0
+; CHECK-GI-NEXT:    uabdl v22.4s, v5.4h, v7.4h
+; CHECK-GI-NEXT:    uabdl2 v5.4s, v5.8h, v7.8h
+; CHECK-GI-NEXT:    ldr d18, [x10]
+; CHECK-GI-NEXT:    ldr d19, [x11]
+; CHECK-GI-NEXT:    addv s0, v0.4s
+; CHECK-GI-NEXT:    addv s2, v2.4s
+; CHECK-GI-NEXT:    addv s1, v1.4s
+; CHECK-GI-NEXT:    ushll v18.8h, v18.8b, #0
+; CHECK-GI-NEXT:    ushll v19.8h, v19.8b, #0
+; CHECK-GI-NEXT:    uabdl v4.4s, v16.4h, v17.4h
+; CHECK-GI-NEXT:    uabdl2 v16.4s, v16.8h, v17.8h
+; CHECK-GI-NEXT:    add v5.4s, v22.4s, v5.4s
+; CHECK-GI-NEXT:    ldr d20, [x10, x8]
+; CHECK-GI-NEXT:    ldr d21, [x11, x9]
+; CHECK-GI-NEXT:    addv s3, v3.4s
+; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    fmov w9, s0
+; CHECK-GI-NEXT:    ushll v7.8h, v20.8b, #0
+; CHECK-GI-NEXT:    ushll v20.8h, v21.8b, #0
+; CHECK-GI-NEXT:    uabdl v6.4s, v18.4h, v19.4h
+; CHECK-GI-NEXT:    uabdl2 v17.4s, v18.8h, v19.8h
+; CHECK-GI-NEXT:    add v4.4s, v4.4s, v16.4s
+; CHECK-GI-NEXT:    addv s5, v5.4s
+; CHECK-GI-NEXT:    fmov w10, s1
+; CHECK-GI-NEXT:    add w8, w8, w9
+; CHECK-GI-NEXT:    fmov w9, s3
+; CHECK-GI-NEXT:    uabdl v18.4s, v7.4h, v20.4h
+; CHECK-GI-NEXT:    uabdl2 v7.4s, v7.8h, v20.8h
+; CHECK-GI-NEXT:    add v6.4s, v6.4s, v17.4s
+; CHECK-GI-NEXT:    add w8, w10, w8
+; CHECK-GI-NEXT:    addv s0, v4.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s5
+; CHECK-GI-NEXT:    add v7.4s, v18.4s, v7.4s
+; CHECK-GI-NEXT:    addv s1, v6.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s0
+; CHECK-GI-NEXT:    addv s2, v7.4s
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    add w8, w9, w8
+; CHECK-GI-NEXT:    fmov w9, s2
+; CHECK-GI-NEXT:    add w0, w9, w8
+; CHECK-GI-NEXT:    ret
 entry:
   %idx.ext8 = sext i32 %s2 to i64
   %idx.ext = sext i32 %s1 to i64


        


More information about the llvm-commits mailing list