[llvm] 7582308 - [AArch64][GISel] Scalarize i128 vector shifts.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 10:44:30 PDT 2024
Author: David Green
Date: 2024-09-13T18:44:25+01:00
New Revision: 758230827d59ab312515e7ad9e6d25b799dedd46
URL: https://github.com/llvm/llvm-project/commit/758230827d59ab312515e7ad9e6d25b799dedd46
DIFF: https://github.com/llvm/llvm-project/commit/758230827d59ab312515e7ad9e6d25b799dedd46.diff
LOG: [AArch64][GISel] Scalarize i128 vector shifts.
Like most other i128 operations, this adds scalarization for i128 vector
shifts. Which in turn allows a few other operations to legalize too.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/abs.ll
llvm/test/CodeGen/AArch64/fcmp.ll
llvm/test/CodeGen/AArch64/shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index db5cd1d32d73d0..623e59c4be8053 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -179,7 +179,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0)
- .minScalarSameAs(1, 0);
+ .minScalarSameAs(1, 0)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}, {v2p0, v2s64}})
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 6da019a79b7277..25a14ef9a49ee8 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -280,6 +280,40 @@ entry:
}
declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+define <2 x i128> @abs_v4i128(<2 x i128> %a){
+; CHECK-SD-LABEL: abs_v4i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: asr x8, x1, #63
+; CHECK-SD-NEXT: asr x9, x3, #63
+; CHECK-SD-NEXT: eor x10, x0, x8
+; CHECK-SD-NEXT: eor x11, x1, x8
+; CHECK-SD-NEXT: subs x0, x10, x8
+; CHECK-SD-NEXT: eor x10, x2, x9
+; CHECK-SD-NEXT: sbc x1, x11, x8
+; CHECK-SD-NEXT: eor x8, x3, x9
+; CHECK-SD-NEXT: subs x2, x10, x9
+; CHECK-SD-NEXT: sbc x3, x8, x9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abs_v4i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: asr x8, x1, #63
+; CHECK-GI-NEXT: asr x9, x3, #63
+; CHECK-GI-NEXT: adds x10, x0, x8
+; CHECK-GI-NEXT: adc x11, x1, x8
+; CHECK-GI-NEXT: adds x12, x2, x9
+; CHECK-GI-NEXT: eor x0, x10, x8
+; CHECK-GI-NEXT: adc x13, x3, x9
+; CHECK-GI-NEXT: eor x1, x11, x8
+; CHECK-GI-NEXT: eor x2, x12, x9
+; CHECK-GI-NEXT: eor x3, x13, x9
+; CHECK-GI-NEXT: ret
+entry:
+ %res = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %a, i1 0)
+ ret <2 x i128> %res
+}
+declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1)
+
; ===== Vectors with Non-Pow 2 Widths =====
define <3 x i8> @abs_v3i8(<3 x i8> %a){
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 8ca1e9ee5b6178..5e44da5fcfa2d8 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -1,11 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI: warning: Instruction selection used fallback path for v2f128_fp128
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f128_fp128
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) {
; CHECK-SD-LABEL: f128_fp128:
@@ -429,35 +426,90 @@ entry:
}
define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, <2 x fp128> %e) {
-; CHECK-LABEL: v2f128_fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.ge .LBB12_2
-; CHECK-NEXT: // %bb.1: // %entry
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: .LBB12_2: // %entry
-; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.ge .LBB12_4
-; CHECK-NEXT: // %bb.3: // %entry
-; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: .LBB12_4: // %entry
-; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #112
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2f128_fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #112
+; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: b.ge .LBB12_2
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .LBB12_2: // %entry
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: b.ge .LBB12_4
+; CHECK-SD-NEXT: // %bb.3: // %entry
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: .LBB12_4: // %entry
+; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #112
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2f128_fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #112
+; CHECK-GI-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: stp q4, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, lt
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: bfi x19, x8, #32, #32
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov x11, v0.d[1]
+; CHECK-GI-NEXT: bfi x8, x8, #32, #32
+; CHECK-GI-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT: lsl x9, x19, #63
+; CHECK-GI-NEXT: lsl x8, x8, #63
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT: asr x9, x9, #63
+; CHECK-GI-NEXT: fmov x12, d0
+; CHECK-GI-NEXT: mov x13, v0.d[1]
+; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: fmov x14, d1
+; CHECK-GI-NEXT: asr x8, x8, #63
+; CHECK-GI-NEXT: and x10, x10, x9
+; CHECK-GI-NEXT: fmov x15, d0
+; CHECK-GI-NEXT: mov x16, v1.d[1]
+; CHECK-GI-NEXT: mov x17, v0.d[1]
+; CHECK-GI-NEXT: and x12, x12, x8
+; CHECK-GI-NEXT: bic x14, x14, x9
+; CHECK-GI-NEXT: bic x15, x15, x8
+; CHECK-GI-NEXT: orr x10, x10, x14
+; CHECK-GI-NEXT: orr x12, x12, x15
+; CHECK-GI-NEXT: mov v0.d[0], x10
+; CHECK-GI-NEXT: and x10, x11, x9
+; CHECK-GI-NEXT: mov v1.d[0], x12
+; CHECK-GI-NEXT: and x11, x13, x8
+; CHECK-GI-NEXT: bic x9, x16, x9
+; CHECK-GI-NEXT: bic x8, x17, x8
+; CHECK-GI-NEXT: orr x9, x10, x9
+; CHECK-GI-NEXT: orr x8, x11, x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: mov v1.d[1], x8
+; CHECK-GI-NEXT: add sp, sp, #112
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt <2 x fp128> %a, %b
%s = select <2 x i1> %c, <2 x fp128> %d, <2 x fp128> %e
@@ -465,42 +517,129 @@ entry:
}
define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, <3 x fp128> %e) {
-; CHECK-LABEL: v3f128_fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 112
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v3.16b
-; CHECK-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill
-; CHECK-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.lt .LBB13_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: ldr q0, [sp, #128]
-; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: .LBB13_2: // %entry
-; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.lt .LBB13_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: ldr q0, [sp, #144]
-; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: .LBB13_4: // %entry
-; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: add x8, sp, #160
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: add x9, sp, #112
-; CHECK-NEXT: csel x8, x9, x8, lt
-; CHECK-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload
-; CHECK-NEXT: ldr q2, [x8]
-; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #112
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v3f128_fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #112
+; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: b.lt .LBB13_2
+; CHECK-SD-NEXT: // %bb.1:
+; CHECK-SD-NEXT: ldr q0, [sp, #128]
+; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .LBB13_2: // %entry
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: b.lt .LBB13_4
+; CHECK-SD-NEXT: // %bb.3:
+; CHECK-SD-NEXT: ldr q0, [sp, #144]
+; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .LBB13_4: // %entry
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: add x8, sp, #160
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: add x9, sp, #112
+; CHECK-SD-NEXT: csel x8, x9, x8, lt
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload
+; CHECK-SD-NEXT: ldr q2, [x8]
+; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #112
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3f128_fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #192
+; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 192
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: stp q4, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT: ldr q2, [sp, #192]
+; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-GI-NEXT: ldr q2, [sp, #208]
+; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q2, [sp, #224]
+; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q2, [sp, #240]
+; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, lt
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w20, lt
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bfi x19, x8, #32, #32
+; CHECK-GI-NEXT: bfi x20, x8, #32, #32
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: lsl x13, x19, #63
+; CHECK-GI-NEXT: lsl x14, x20, #63
+; CHECK-GI-NEXT: fmov x11, d0
+; CHECK-GI-NEXT: mov x12, v0.d[1]
+; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bfi x9, x8, #32, #32
+; CHECK-GI-NEXT: asr x13, x13, #63
+; CHECK-GI-NEXT: asr x14, x14, #63
+; CHECK-GI-NEXT: fmov x15, d0
+; CHECK-GI-NEXT: mov x16, v0.d[1]
+; CHECK-GI-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
+; CHECK-GI-NEXT: lsl x9, x9, #63
+; CHECK-GI-NEXT: and x8, x8, x13
+; CHECK-GI-NEXT: and x11, x11, x14
+; CHECK-GI-NEXT: asr x9, x9, #63
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-GI-NEXT: fmov x17, d0
+; CHECK-GI-NEXT: mov x18, v0.d[1]
+; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-GI-NEXT: fmov x0, d1
+; CHECK-GI-NEXT: and x15, x15, x9
+; CHECK-GI-NEXT: mov x2, v1.d[1]
+; CHECK-GI-NEXT: fmov x1, d0
+; CHECK-GI-NEXT: mov x3, v0.d[1]
+; CHECK-GI-NEXT: bic x17, x17, x13
+; CHECK-GI-NEXT: bic x0, x0, x14
+; CHECK-GI-NEXT: orr x8, x8, x17
+; CHECK-GI-NEXT: bic x1, x1, x9
+; CHECK-GI-NEXT: orr x11, x11, x0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: orr x15, x15, x1
+; CHECK-GI-NEXT: mov v1.d[0], x11
+; CHECK-GI-NEXT: and x8, x10, x13
+; CHECK-GI-NEXT: mov v2.d[0], x15
+; CHECK-GI-NEXT: and x10, x12, x14
+; CHECK-GI-NEXT: and x11, x16, x9
+; CHECK-GI-NEXT: bic x12, x18, x13
+; CHECK-GI-NEXT: bic x13, x2, x14
+; CHECK-GI-NEXT: bic x9, x3, x9
+; CHECK-GI-NEXT: orr x8, x8, x12
+; CHECK-GI-NEXT: orr x10, x10, x13
+; CHECK-GI-NEXT: orr x9, x11, x9
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x10
+; CHECK-GI-NEXT: mov v2.d[1], x9
+; CHECK-GI-NEXT: add sp, sp, #192
+; CHECK-GI-NEXT: ret
entry:
%c = fcmp olt <3 x fp128> %a, %b
%s = select <3 x i1> %c, <3 x fp128> %d, <3 x fp128> %e
diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 951458da17c07e..7014a4a9acbe03 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i1 @shl_i1(i1 %0, i1 %1){
@@ -674,6 +674,61 @@ define <4 x i64> @shl_v4i64(<4 x i64> %0, <4 x i64> %1){
ret <4 x i64> %3
}
+define <2 x i128> @shl_v2i128(<2 x i128> %0, <2 x i128> %1){
+; CHECK-SD-LABEL: shl_v2i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, #1
+; CHECK-SD-NEXT: mvn w9, w4
+; CHECK-SD-NEXT: lsl x10, x1, x4
+; CHECK-SD-NEXT: mvn w12, w6
+; CHECK-SD-NEXT: lsl x11, x0, x4
+; CHECK-SD-NEXT: lsl x13, x3, x6
+; CHECK-SD-NEXT: lsr x8, x8, x9
+; CHECK-SD-NEXT: lsr x9, x2, #1
+; CHECK-SD-NEXT: tst x4, #0x40
+; CHECK-SD-NEXT: csel x0, xzr, x11, ne
+; CHECK-SD-NEXT: lsr x9, x9, x12
+; CHECK-SD-NEXT: orr x8, x10, x8
+; CHECK-SD-NEXT: lsl x10, x2, x6
+; CHECK-SD-NEXT: csel x1, x11, x8, ne
+; CHECK-SD-NEXT: tst x6, #0x40
+; CHECK-SD-NEXT: orr x8, x13, x9
+; CHECK-SD-NEXT: csel x2, xzr, x10, ne
+; CHECK-SD-NEXT: csel x3, x10, x8, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shl_v2i128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: sub x10, x4, #64
+; CHECK-GI-NEXT: lsl x11, x1, x4
+; CHECK-GI-NEXT: sub x9, x8, x4
+; CHECK-GI-NEXT: lsl x10, x0, x10
+; CHECK-GI-NEXT: lsl x12, x0, x4
+; CHECK-GI-NEXT: lsr x9, x0, x9
+; CHECK-GI-NEXT: cmp x4, #64
+; CHECK-GI-NEXT: sub x8, x8, x6
+; CHECK-GI-NEXT: lsr x8, x2, x8
+; CHECK-GI-NEXT: csel x0, x12, xzr, lo
+; CHECK-GI-NEXT: lsl x12, x2, x6
+; CHECK-GI-NEXT: orr x9, x9, x11
+; CHECK-GI-NEXT: lsl x11, x3, x6
+; CHECK-GI-NEXT: csel x9, x9, x10, lo
+; CHECK-GI-NEXT: sub x10, x6, #64
+; CHECK-GI-NEXT: cmp x4, #0
+; CHECK-GI-NEXT: lsl x10, x2, x10
+; CHECK-GI-NEXT: csel x1, x1, x9, eq
+; CHECK-GI-NEXT: orr x8, x8, x11
+; CHECK-GI-NEXT: cmp x6, #64
+; CHECK-GI-NEXT: csel x2, x12, xzr, lo
+; CHECK-GI-NEXT: csel x8, x8, x10, lo
+; CHECK-GI-NEXT: cmp x6, #0
+; CHECK-GI-NEXT: csel x3, x3, x8, eq
+; CHECK-GI-NEXT: ret
+ %3 = shl <2 x i128> %0, %1
+ ret <2 x i128> %3
+}
+
define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){
; CHECK-SD-LABEL: ashr_v4i8:
; CHECK-SD: // %bb.0:
@@ -819,6 +874,67 @@ define <4 x i64> @ashr_v4i64(<4 x i64> %0, <4 x i64> %1){
ret <4 x i64> %3
}
+define <2 x i128> @ashr_v2i128(<2 x i128> %0, <2 x i128> %1){
+; CHECK-SD-LABEL: ashr_v2i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x1, #1
+; CHECK-SD-NEXT: mvn w9, w4
+; CHECK-SD-NEXT: lsl x10, x3, #1
+; CHECK-SD-NEXT: lsr x11, x0, x4
+; CHECK-SD-NEXT: lsr x12, x2, x6
+; CHECK-SD-NEXT: asr x13, x1, #63
+; CHECK-SD-NEXT: lsl x8, x8, x9
+; CHECK-SD-NEXT: mvn w9, w6
+; CHECK-SD-NEXT: tst x4, #0x40
+; CHECK-SD-NEXT: lsl x9, x10, x9
+; CHECK-SD-NEXT: asr x10, x1, x4
+; CHECK-SD-NEXT: asr x14, x3, #63
+; CHECK-SD-NEXT: orr x8, x8, x11
+; CHECK-SD-NEXT: asr x11, x3, x6
+; CHECK-SD-NEXT: csel x0, x10, x8, ne
+; CHECK-SD-NEXT: orr x8, x9, x12
+; CHECK-SD-NEXT: csel x1, x13, x10, ne
+; CHECK-SD-NEXT: tst x6, #0x40
+; CHECK-SD-NEXT: csel x2, x11, x8, ne
+; CHECK-SD-NEXT: csel x3, x14, x11, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ashr_v2i128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: sub x10, x4, #64
+; CHECK-GI-NEXT: lsr x11, x0, x4
+; CHECK-GI-NEXT: sub x9, x8, x4
+; CHECK-GI-NEXT: asr x10, x1, x10
+; CHECK-GI-NEXT: cmp x4, #64
+; CHECK-GI-NEXT: lsl x9, x1, x9
+; CHECK-GI-NEXT: sub x8, x8, x6
+; CHECK-GI-NEXT: asr x12, x1, x4
+; CHECK-GI-NEXT: lsl x8, x3, x8
+; CHECK-GI-NEXT: orr x9, x11, x9
+; CHECK-GI-NEXT: asr x11, x1, #63
+; CHECK-GI-NEXT: csel x9, x9, x10, lo
+; CHECK-GI-NEXT: cmp x4, #0
+; CHECK-GI-NEXT: lsr x10, x2, x6
+; CHECK-GI-NEXT: csel x0, x0, x9, eq
+; CHECK-GI-NEXT: sub x9, x6, #64
+; CHECK-GI-NEXT: cmp x4, #64
+; CHECK-GI-NEXT: asr x9, x3, x9
+; CHECK-GI-NEXT: csel x1, x12, x11, lo
+; CHECK-GI-NEXT: orr x8, x10, x8
+; CHECK-GI-NEXT: cmp x6, #64
+; CHECK-GI-NEXT: asr x11, x3, x6
+; CHECK-GI-NEXT: asr x10, x3, #63
+; CHECK-GI-NEXT: csel x8, x8, x9, lo
+; CHECK-GI-NEXT: cmp x6, #0
+; CHECK-GI-NEXT: csel x2, x2, x8, eq
+; CHECK-GI-NEXT: cmp x6, #64
+; CHECK-GI-NEXT: csel x3, x11, x10, lo
+; CHECK-GI-NEXT: ret
+ %3 = ashr <2 x i128> %0, %1
+ ret <2 x i128> %3
+}
+
define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){
; CHECK-SD-LABEL: lshr_v4i8:
; CHECK-SD: // %bb.0:
@@ -962,6 +1078,63 @@ define <4 x i64> @lshr_v4i64(<4 x i64> %0, <4 x i64> %1){
ret <4 x i64> %3
}
+define <2 x i128> @lshr_v2i128(<2 x i128> %0, <2 x i128> %1){
+; CHECK-SD-LABEL: lshr_v2i128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x1, #1
+; CHECK-SD-NEXT: mvn w9, w4
+; CHECK-SD-NEXT: lsr x10, x0, x4
+; CHECK-SD-NEXT: mvn w12, w6
+; CHECK-SD-NEXT: lsr x11, x1, x4
+; CHECK-SD-NEXT: lsr x13, x2, x6
+; CHECK-SD-NEXT: lsl x8, x8, x9
+; CHECK-SD-NEXT: lsl x9, x3, #1
+; CHECK-SD-NEXT: tst x4, #0x40
+; CHECK-SD-NEXT: csel x1, xzr, x11, ne
+; CHECK-SD-NEXT: lsl x9, x9, x12
+; CHECK-SD-NEXT: orr x8, x8, x10
+; CHECK-SD-NEXT: lsr x10, x3, x6
+; CHECK-SD-NEXT: csel x0, x11, x8, ne
+; CHECK-SD-NEXT: tst x6, #0x40
+; CHECK-SD-NEXT: orr x8, x9, x13
+; CHECK-SD-NEXT: csel x3, xzr, x10, ne
+; CHECK-SD-NEXT: csel x2, x10, x8, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: lshr_v2i128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: sub x10, x4, #64
+; CHECK-GI-NEXT: lsr x11, x0, x4
+; CHECK-GI-NEXT: sub x9, x8, x4
+; CHECK-GI-NEXT: lsr x10, x1, x10
+; CHECK-GI-NEXT: cmp x4, #64
+; CHECK-GI-NEXT: lsl x9, x1, x9
+; CHECK-GI-NEXT: sub x8, x8, x6
+; CHECK-GI-NEXT: lsr x12, x1, x4
+; CHECK-GI-NEXT: lsl x8, x3, x8
+; CHECK-GI-NEXT: orr x9, x11, x9
+; CHECK-GI-NEXT: lsr x11, x2, x6
+; CHECK-GI-NEXT: csel x9, x9, x10, lo
+; CHECK-GI-NEXT: cmp x4, #0
+; CHECK-GI-NEXT: sub x10, x6, #64
+; CHECK-GI-NEXT: csel x0, x0, x9, eq
+; CHECK-GI-NEXT: cmp x4, #64
+; CHECK-GI-NEXT: lsr x9, x3, x10
+; CHECK-GI-NEXT: csel x1, x12, xzr, lo
+; CHECK-GI-NEXT: orr x8, x11, x8
+; CHECK-GI-NEXT: cmp x6, #64
+; CHECK-GI-NEXT: lsr x10, x3, x6
+; CHECK-GI-NEXT: csel x8, x8, x9, lo
+; CHECK-GI-NEXT: cmp x6, #0
+; CHECK-GI-NEXT: csel x2, x2, x8, eq
+; CHECK-GI-NEXT: cmp x6, #64
+; CHECK-GI-NEXT: csel x3, x10, xzr, lo
+; CHECK-GI-NEXT: ret
+ %3 = lshr <2 x i128> %0, %1
+ ret <2 x i128> %3
+}
+
; ===== Vector with Non-Pow 2 Width =====
define <3 x i8> @shl_v3i8(<3 x i8> %0, <3 x i8> %1){
More information about the llvm-commits
mailing list