[llvm] d4a6995 - [AArch64][GlobalISel] Legalize large G_SEXT_INREG
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 1 07:28:13 PST 2024
Author: David Green
Date: 2024-01-01T15:28:08Z
New Revision: d4a6995e9438c72b4bb2054ca8ae7e0016f97682
URL: https://github.com/llvm/llvm-project/commit/d4a6995e9438c72b4bb2054ca8ae7e0016f97682
DIFF: https://github.com/llvm/llvm-project/commit/d4a6995e9438c72b4bb2054ca8ae7e0016f97682.diff
LOG: [AArch64][GlobalISel] Legalize large G_SEXT_INREG
These come from the legalization of other operations, but it makes sense to
split the operations into legal sizes before lowering them.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/aarch64-addv.ll
llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
llvm/test/CodeGen/AArch64/arm64-vabs.ll
llvm/test/CodeGen/AArch64/fcmp.ll
llvm/test/CodeGen/AArch64/icmp.ll
llvm/test/CodeGen/AArch64/itofp.ll
llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
llvm/test/CodeGen/AArch64/sext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4eccaa5be3cf19..1d0e8be80d078c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -623,6 +623,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({s32, s64})
.legalFor(PackedVectorAllTypeList)
.maxScalar(0, s64)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2)
.lower();
// FP conversions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 7b09a4cc7b8a27..5b78b0d4773109 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -23,14 +23,14 @@ declare i64 @llvm.vector.reduce.add.v3i64(<3 x i64>)
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128>)
-; GISEL: warning: Instruction selection used fallback path for addv_v2i8
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i8
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v4i8
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i16
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i16
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i32
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i64
-; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i128
+; GISEL: warning: Instruction selection used fallback path for addv_v2i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v4i8
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i16
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i16
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i32
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v3i64
+; GISEL-NEXT: warning: Instruction selection used fallback path for addv_v2i128
define i8 @add_B(ptr %arr) {
@@ -101,16 +101,12 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
; GISEL-NEXT: ushll v2.8h, v2.8b, #0
; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
-; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
+; GISEL-NEXT: neg v2.4s, v3.4s
+; GISEL-NEXT: neg v4.4s, v1.4s
+; GISEL-NEXT: cmgt v5.4s, v0.4s, v3.4s
; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; GISEL-NEXT: neg v4.4s, v3.4s
-; GISEL-NEXT: neg v5.4s, v1.4s
-; GISEL-NEXT: shl v2.4s, v2.4s, #31
-; GISEL-NEXT: shl v0.4s, v0.4s, #31
-; GISEL-NEXT: sshr v2.4s, v2.4s, #31
-; GISEL-NEXT: sshr v0.4s, v0.4s, #31
-; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
-; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
+; GISEL-NEXT: bif v2.16b, v3.16b, v5.16b
+; GISEL-NEXT: bsl v0.16b, v4.16b, v1.16b
; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 51f8c2ceceecb0..00cc6b21ccaf8b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -5,10 +5,8 @@
; Test efficient codegen of vector extends up from legal type to 128 bit
; and 256 bit vector types.
-; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v32i1
+; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v64i1
;-----
; Vectors of i16.
@@ -402,69 +400,133 @@ define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
}
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
-; CHECK-LABEL: sext_v32i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [sp, #64]
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: ldr w9, [sp, #72]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ldr w8, [sp, #80]
-; CHECK-NEXT: mov.b v1[1], w1
-; CHECK-NEXT: mov.b v0[1], w9
-; CHECK-NEXT: ldr w9, [sp]
-; CHECK-NEXT: mov.b v1[2], w2
-; CHECK-NEXT: mov.b v0[2], w8
-; CHECK-NEXT: ldr w8, [sp, #88]
-; CHECK-NEXT: mov.b v1[3], w3
-; CHECK-NEXT: mov.b v0[3], w8
-; CHECK-NEXT: ldr w8, [sp, #96]
-; CHECK-NEXT: mov.b v1[4], w4
-; CHECK-NEXT: mov.b v0[4], w8
-; CHECK-NEXT: ldr w8, [sp, #104]
-; CHECK-NEXT: mov.b v1[5], w5
-; CHECK-NEXT: mov.b v0[5], w8
-; CHECK-NEXT: ldr w8, [sp, #112]
-; CHECK-NEXT: mov.b v1[6], w6
-; CHECK-NEXT: mov.b v0[6], w8
-; CHECK-NEXT: ldr w8, [sp, #120]
-; CHECK-NEXT: mov.b v1[7], w7
-; CHECK-NEXT: mov.b v0[7], w8
-; CHECK-NEXT: ldr w8, [sp, #128]
-; CHECK-NEXT: mov.b v1[8], w9
-; CHECK-NEXT: ldr w9, [sp, #8]
-; CHECK-NEXT: mov.b v0[8], w8
-; CHECK-NEXT: ldr w8, [sp, #136]
-; CHECK-NEXT: mov.b v1[9], w9
-; CHECK-NEXT: ldr w9, [sp, #16]
-; CHECK-NEXT: mov.b v0[9], w8
-; CHECK-NEXT: ldr w8, [sp, #144]
-; CHECK-NEXT: mov.b v1[10], w9
-; CHECK-NEXT: ldr w9, [sp, #24]
-; CHECK-NEXT: mov.b v0[10], w8
-; CHECK-NEXT: ldr w8, [sp, #152]
-; CHECK-NEXT: mov.b v1[11], w9
-; CHECK-NEXT: ldr w9, [sp, #32]
-; CHECK-NEXT: mov.b v0[11], w8
-; CHECK-NEXT: ldr w8, [sp, #160]
-; CHECK-NEXT: mov.b v1[12], w9
-; CHECK-NEXT: ldr w9, [sp, #40]
-; CHECK-NEXT: mov.b v0[12], w8
-; CHECK-NEXT: ldr w8, [sp, #168]
-; CHECK-NEXT: mov.b v1[13], w9
-; CHECK-NEXT: ldr w9, [sp, #48]
-; CHECK-NEXT: mov.b v0[13], w8
-; CHECK-NEXT: ldr w8, [sp, #176]
-; CHECK-NEXT: mov.b v1[14], w9
-; CHECK-NEXT: ldr w9, [sp, #56]
-; CHECK-NEXT: mov.b v0[14], w8
-; CHECK-NEXT: ldr w8, [sp, #184]
-; CHECK-NEXT: mov.b v1[15], w9
-; CHECK-NEXT: mov.b v0[15], w8
-; CHECK-NEXT: shl.16b v1, v1, #7
-; CHECK-NEXT: shl.16b v2, v0, #7
-; CHECK-NEXT: cmlt.16b v0, v1, #0
-; CHECK-NEXT: cmlt.16b v1, v2, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v32i1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr w8, [sp, #64]
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: ldr w9, [sp, #72]
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: ldr w8, [sp, #80]
+; CHECK-SD-NEXT: mov.b v1[1], w1
+; CHECK-SD-NEXT: mov.b v0[1], w9
+; CHECK-SD-NEXT: ldr w9, [sp]
+; CHECK-SD-NEXT: mov.b v1[2], w2
+; CHECK-SD-NEXT: mov.b v0[2], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #88]
+; CHECK-SD-NEXT: mov.b v1[3], w3
+; CHECK-SD-NEXT: mov.b v0[3], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #96]
+; CHECK-SD-NEXT: mov.b v1[4], w4
+; CHECK-SD-NEXT: mov.b v0[4], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #104]
+; CHECK-SD-NEXT: mov.b v1[5], w5
+; CHECK-SD-NEXT: mov.b v0[5], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #112]
+; CHECK-SD-NEXT: mov.b v1[6], w6
+; CHECK-SD-NEXT: mov.b v0[6], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #120]
+; CHECK-SD-NEXT: mov.b v1[7], w7
+; CHECK-SD-NEXT: mov.b v0[7], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #128]
+; CHECK-SD-NEXT: mov.b v1[8], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #8]
+; CHECK-SD-NEXT: mov.b v0[8], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #136]
+; CHECK-SD-NEXT: mov.b v1[9], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #16]
+; CHECK-SD-NEXT: mov.b v0[9], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #144]
+; CHECK-SD-NEXT: mov.b v1[10], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #24]
+; CHECK-SD-NEXT: mov.b v0[10], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #152]
+; CHECK-SD-NEXT: mov.b v1[11], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #32]
+; CHECK-SD-NEXT: mov.b v0[11], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #160]
+; CHECK-SD-NEXT: mov.b v1[12], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #40]
+; CHECK-SD-NEXT: mov.b v0[12], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #168]
+; CHECK-SD-NEXT: mov.b v1[13], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #48]
+; CHECK-SD-NEXT: mov.b v0[13], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #176]
+; CHECK-SD-NEXT: mov.b v1[14], w9
+; CHECK-SD-NEXT: ldr w9, [sp, #56]
+; CHECK-SD-NEXT: mov.b v0[14], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #184]
+; CHECK-SD-NEXT: mov.b v1[15], w9
+; CHECK-SD-NEXT: mov.b v0[15], w8
+; CHECK-SD-NEXT: shl.16b v1, v1, #7
+; CHECK-SD-NEXT: shl.16b v2, v0, #7
+; CHECK-SD-NEXT: cmlt.16b v0, v1, #0
+; CHECK-SD-NEXT: cmlt.16b v1, v2, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v32i1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s17, w0
+; CHECK-GI-NEXT: fmov s19, w4
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: ldr s21, [sp, #8]
+; CHECK-GI-NEXT: ldr s1, [sp, #32]
+; CHECK-GI-NEXT: ldr s22, [sp, #40]
+; CHECK-GI-NEXT: ldr s2, [sp, #64]
+; CHECK-GI-NEXT: ldr s23, [sp, #72]
+; CHECK-GI-NEXT: ldr s3, [sp, #96]
+; CHECK-GI-NEXT: ldr s24, [sp, #104]
+; CHECK-GI-NEXT: mov.s v17[1], w1
+; CHECK-GI-NEXT: mov.s v19[1], w5
+; CHECK-GI-NEXT: ldr s5, [sp, #128]
+; CHECK-GI-NEXT: ldr s20, [sp, #136]
+; CHECK-GI-NEXT: mov.s v0[1], v21[0]
+; CHECK-GI-NEXT: ldr s7, [sp, #160]
+; CHECK-GI-NEXT: ldr s25, [sp, #168]
+; CHECK-GI-NEXT: mov.s v1[1], v22[0]
+; CHECK-GI-NEXT: mov.s v2[1], v23[0]
+; CHECK-GI-NEXT: mov.s v3[1], v24[0]
+; CHECK-GI-NEXT: mov.s v5[1], v20[0]
+; CHECK-GI-NEXT: mov.s v7[1], v25[0]
+; CHECK-GI-NEXT: ldr s16, [sp, #16]
+; CHECK-GI-NEXT: ldr s18, [sp, #48]
+; CHECK-GI-NEXT: ldr s20, [sp, #80]
+; CHECK-GI-NEXT: ldr s21, [sp, #112]
+; CHECK-GI-NEXT: ldr s22, [sp, #144]
+; CHECK-GI-NEXT: ldr s23, [sp, #176]
+; CHECK-GI-NEXT: mov.s v17[2], w2
+; CHECK-GI-NEXT: mov.s v19[2], w6
+; CHECK-GI-NEXT: mov.s v0[2], v16[0]
+; CHECK-GI-NEXT: mov.s v1[2], v18[0]
+; CHECK-GI-NEXT: mov.s v2[2], v20[0]
+; CHECK-GI-NEXT: mov.s v3[2], v21[0]
+; CHECK-GI-NEXT: mov.s v5[2], v22[0]
+; CHECK-GI-NEXT: mov.s v7[2], v23[0]
+; CHECK-GI-NEXT: ldr s4, [sp, #24]
+; CHECK-GI-NEXT: ldr s6, [sp, #56]
+; CHECK-GI-NEXT: ldr s16, [sp, #88]
+; CHECK-GI-NEXT: ldr s18, [sp, #120]
+; CHECK-GI-NEXT: ldr s20, [sp, #152]
+; CHECK-GI-NEXT: ldr s21, [sp, #184]
+; CHECK-GI-NEXT: mov.s v17[3], w3
+; CHECK-GI-NEXT: mov.s v19[3], w7
+; CHECK-GI-NEXT: mov.s v0[3], v4[0]
+; CHECK-GI-NEXT: mov.s v1[3], v6[0]
+; CHECK-GI-NEXT: mov.s v2[3], v16[0]
+; CHECK-GI-NEXT: mov.s v3[3], v18[0]
+; CHECK-GI-NEXT: mov.s v5[3], v20[0]
+; CHECK-GI-NEXT: mov.s v7[3], v21[0]
+; CHECK-GI-NEXT: uzp1.8h v4, v17, v19
+; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
+; CHECK-GI-NEXT: uzp1.8h v1, v2, v3
+; CHECK-GI-NEXT: uzp1.8h v2, v5, v7
+; CHECK-GI-NEXT: uzp1.16b v0, v4, v0
+; CHECK-GI-NEXT: uzp1.16b v1, v1, v2
+; CHECK-GI-NEXT: shl.16b v0, v0, #7
+; CHECK-GI-NEXT: shl.16b v1, v1, #7
+; CHECK-GI-NEXT: sshr.16b v0, v0, #7
+; CHECK-GI-NEXT: sshr.16b v1, v1, #7
+; CHECK-GI-NEXT: ret
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
}
@@ -607,141 +669,279 @@ define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
}
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
-; CHECK-LABEL: sext_v64i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: ldr w8, [sp, #336]
-; CHECK-NEXT: ldr w9, [sp, #208]
-; CHECK-NEXT: fmov s2, w0
-; CHECK-NEXT: ldr w10, [sp, #80]
-; CHECK-NEXT: ldr w11, [sp, #216]
-; CHECK-NEXT: ldr w12, [sp, #88]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: ldr w8, [sp, #344]
-; CHECK-NEXT: fmov s3, w10
-; CHECK-NEXT: mov.b v2[1], w1
-; CHECK-NEXT: ldr w9, [sp, #224]
-; CHECK-NEXT: ldr w10, [sp, #96]
-; CHECK-NEXT: mov.b v0[1], w8
-; CHECK-NEXT: mov.b v1[1], w11
-; CHECK-NEXT: ldr w8, [sp, #352]
-; CHECK-NEXT: mov.b v3[1], w12
-; CHECK-NEXT: ldr w11, [sp, #144]
-; CHECK-NEXT: mov.b v2[2], w2
-; CHECK-NEXT: mov.b v0[2], w8
-; CHECK-NEXT: mov.b v1[2], w9
-; CHECK-NEXT: ldr w8, [sp, #360]
-; CHECK-NEXT: mov.b v3[2], w10
-; CHECK-NEXT: ldr w9, [sp, #232]
-; CHECK-NEXT: ldr w10, [sp, #104]
-; CHECK-NEXT: mov.b v2[3], w3
-; CHECK-NEXT: mov.b v0[3], w8
-; CHECK-NEXT: mov.b v1[3], w9
-; CHECK-NEXT: ldr w8, [sp, #368]
-; CHECK-NEXT: mov.b v3[3], w10
-; CHECK-NEXT: ldr w9, [sp, #240]
-; CHECK-NEXT: ldr w10, [sp, #112]
-; CHECK-NEXT: mov.b v2[4], w4
-; CHECK-NEXT: mov.b v0[4], w8
-; CHECK-NEXT: mov.b v1[4], w9
-; CHECK-NEXT: ldr w8, [sp, #376]
-; CHECK-NEXT: mov.b v3[4], w10
-; CHECK-NEXT: ldr w9, [sp, #248]
-; CHECK-NEXT: ldr w10, [sp, #120]
-; CHECK-NEXT: mov.b v2[5], w5
-; CHECK-NEXT: mov.b v0[5], w8
-; CHECK-NEXT: mov.b v1[5], w9
-; CHECK-NEXT: ldr w8, [sp, #384]
-; CHECK-NEXT: mov.b v3[5], w10
-; CHECK-NEXT: ldr w9, [sp, #256]
-; CHECK-NEXT: ldr w10, [sp, #128]
-; CHECK-NEXT: mov.b v2[6], w6
-; CHECK-NEXT: mov.b v0[6], w8
-; CHECK-NEXT: mov.b v1[6], w9
-; CHECK-NEXT: ldr w8, [sp, #392]
-; CHECK-NEXT: mov.b v3[6], w10
-; CHECK-NEXT: ldr w9, [sp, #264]
-; CHECK-NEXT: ldr w10, [sp, #136]
-; CHECK-NEXT: mov.b v2[7], w7
-; CHECK-NEXT: mov.b v0[7], w8
-; CHECK-NEXT: mov.b v1[7], w9
-; CHECK-NEXT: ldr w8, [sp, #16]
-; CHECK-NEXT: mov.b v3[7], w10
-; CHECK-NEXT: ldr w9, [sp, #400]
-; CHECK-NEXT: ldr w10, [sp, #272]
-; CHECK-NEXT: mov.b v2[8], w8
-; CHECK-NEXT: ldr w8, [sp, #24]
-; CHECK-NEXT: mov.b v0[8], w9
-; CHECK-NEXT: mov.b v1[8], w10
-; CHECK-NEXT: ldr w9, [sp, #408]
-; CHECK-NEXT: mov.b v3[8], w11
-; CHECK-NEXT: ldr w10, [sp, #280]
-; CHECK-NEXT: ldr w11, [sp, #152]
-; CHECK-NEXT: mov.b v2[9], w8
-; CHECK-NEXT: ldr w8, [sp, #32]
-; CHECK-NEXT: mov.b v0[9], w9
-; CHECK-NEXT: mov.b v1[9], w10
-; CHECK-NEXT: ldr w9, [sp, #416]
-; CHECK-NEXT: mov.b v3[9], w11
-; CHECK-NEXT: ldr w10, [sp, #288]
-; CHECK-NEXT: ldr w11, [sp, #160]
-; CHECK-NEXT: mov.b v2[10], w8
-; CHECK-NEXT: ldr w8, [sp, #40]
-; CHECK-NEXT: mov.b v0[10], w9
-; CHECK-NEXT: mov.b v1[10], w10
-; CHECK-NEXT: ldr w9, [sp, #424]
-; CHECK-NEXT: mov.b v3[10], w11
-; CHECK-NEXT: ldr w10, [sp, #296]
-; CHECK-NEXT: ldr w11, [sp, #168]
-; CHECK-NEXT: mov.b v2[11], w8
-; CHECK-NEXT: ldr w8, [sp, #48]
-; CHECK-NEXT: mov.b v0[11], w9
-; CHECK-NEXT: mov.b v1[11], w10
-; CHECK-NEXT: ldr w9, [sp, #432]
-; CHECK-NEXT: mov.b v3[11], w11
-; CHECK-NEXT: ldr w10, [sp, #304]
-; CHECK-NEXT: ldr w11, [sp, #176]
-; CHECK-NEXT: mov.b v2[12], w8
-; CHECK-NEXT: ldr w8, [sp, #56]
-; CHECK-NEXT: mov.b v0[12], w9
-; CHECK-NEXT: mov.b v1[12], w10
-; CHECK-NEXT: ldr w9, [sp, #440]
-; CHECK-NEXT: mov.b v3[12], w11
-; CHECK-NEXT: ldr w10, [sp, #312]
-; CHECK-NEXT: ldr w11, [sp, #184]
-; CHECK-NEXT: mov.b v2[13], w8
-; CHECK-NEXT: ldr w8, [sp, #64]
-; CHECK-NEXT: mov.b v0[13], w9
-; CHECK-NEXT: mov.b v1[13], w10
-; CHECK-NEXT: ldr w9, [sp, #448]
-; CHECK-NEXT: mov.b v3[13], w11
-; CHECK-NEXT: ldr w10, [sp, #320]
-; CHECK-NEXT: ldr w11, [sp, #192]
-; CHECK-NEXT: mov.b v2[14], w8
-; CHECK-NEXT: ldr w8, [sp, #72]
-; CHECK-NEXT: mov.b v0[14], w9
-; CHECK-NEXT: mov.b v1[14], w10
-; CHECK-NEXT: ldr w9, [sp, #456]
-; CHECK-NEXT: mov.b v3[14], w11
-; CHECK-NEXT: ldr w10, [sp, #328]
-; CHECK-NEXT: ldr w11, [sp, #200]
-; CHECK-NEXT: mov.b v2[15], w8
-; CHECK-NEXT: mov.b v0[15], w9
-; CHECK-NEXT: mov.b v1[15], w10
-; CHECK-NEXT: mov.b v3[15], w11
-; CHECK-NEXT: shl.16b v2, v2, #7
-; CHECK-NEXT: shl.16b v4, v1, #7
-; CHECK-NEXT: shl.16b v5, v0, #7
-; CHECK-NEXT: shl.16b v3, v3, #7
-; CHECK-NEXT: cmlt.16b v0, v2, #0
-; CHECK-NEXT: cmlt.16b v2, v4, #0
-; CHECK-NEXT: cmlt.16b v1, v3, #0
-; CHECK-NEXT: cmlt.16b v3, v5, #0
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v64i1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: ldr w8, [sp, #336]
+; CHECK-SD-NEXT: ldr w9, [sp, #208]
+; CHECK-SD-NEXT: fmov s2, w0
+; CHECK-SD-NEXT: ldr w10, [sp, #80]
+; CHECK-SD-NEXT: ldr w11, [sp, #216]
+; CHECK-SD-NEXT: ldr w12, [sp, #88]
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: fmov s1, w9
+; CHECK-SD-NEXT: ldr w8, [sp, #344]
+; CHECK-SD-NEXT: fmov s3, w10
+; CHECK-SD-NEXT: mov.b v2[1], w1
+; CHECK-SD-NEXT: ldr w9, [sp, #224]
+; CHECK-SD-NEXT: ldr w10, [sp, #96]
+; CHECK-SD-NEXT: mov.b v0[1], w8
+; CHECK-SD-NEXT: mov.b v1[1], w11
+; CHECK-SD-NEXT: ldr w8, [sp, #352]
+; CHECK-SD-NEXT: mov.b v3[1], w12
+; CHECK-SD-NEXT: ldr w11, [sp, #144]
+; CHECK-SD-NEXT: mov.b v2[2], w2
+; CHECK-SD-NEXT: mov.b v0[2], w8
+; CHECK-SD-NEXT: mov.b v1[2], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #360]
+; CHECK-SD-NEXT: mov.b v3[2], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #232]
+; CHECK-SD-NEXT: ldr w10, [sp, #104]
+; CHECK-SD-NEXT: mov.b v2[3], w3
+; CHECK-SD-NEXT: mov.b v0[3], w8
+; CHECK-SD-NEXT: mov.b v1[3], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #368]
+; CHECK-SD-NEXT: mov.b v3[3], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #240]
+; CHECK-SD-NEXT: ldr w10, [sp, #112]
+; CHECK-SD-NEXT: mov.b v2[4], w4
+; CHECK-SD-NEXT: mov.b v0[4], w8
+; CHECK-SD-NEXT: mov.b v1[4], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #376]
+; CHECK-SD-NEXT: mov.b v3[4], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #248]
+; CHECK-SD-NEXT: ldr w10, [sp, #120]
+; CHECK-SD-NEXT: mov.b v2[5], w5
+; CHECK-SD-NEXT: mov.b v0[5], w8
+; CHECK-SD-NEXT: mov.b v1[5], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #384]
+; CHECK-SD-NEXT: mov.b v3[5], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #256]
+; CHECK-SD-NEXT: ldr w10, [sp, #128]
+; CHECK-SD-NEXT: mov.b v2[6], w6
+; CHECK-SD-NEXT: mov.b v0[6], w8
+; CHECK-SD-NEXT: mov.b v1[6], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #392]
+; CHECK-SD-NEXT: mov.b v3[6], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #264]
+; CHECK-SD-NEXT: ldr w10, [sp, #136]
+; CHECK-SD-NEXT: mov.b v2[7], w7
+; CHECK-SD-NEXT: mov.b v0[7], w8
+; CHECK-SD-NEXT: mov.b v1[7], w9
+; CHECK-SD-NEXT: ldr w8, [sp, #16]
+; CHECK-SD-NEXT: mov.b v3[7], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #400]
+; CHECK-SD-NEXT: ldr w10, [sp, #272]
+; CHECK-SD-NEXT: mov.b v2[8], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #24]
+; CHECK-SD-NEXT: mov.b v0[8], w9
+; CHECK-SD-NEXT: mov.b v1[8], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #408]
+; CHECK-SD-NEXT: mov.b v3[8], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #280]
+; CHECK-SD-NEXT: ldr w11, [sp, #152]
+; CHECK-SD-NEXT: mov.b v2[9], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #32]
+; CHECK-SD-NEXT: mov.b v0[9], w9
+; CHECK-SD-NEXT: mov.b v1[9], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #416]
+; CHECK-SD-NEXT: mov.b v3[9], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #288]
+; CHECK-SD-NEXT: ldr w11, [sp, #160]
+; CHECK-SD-NEXT: mov.b v2[10], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #40]
+; CHECK-SD-NEXT: mov.b v0[10], w9
+; CHECK-SD-NEXT: mov.b v1[10], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #424]
+; CHECK-SD-NEXT: mov.b v3[10], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #296]
+; CHECK-SD-NEXT: ldr w11, [sp, #168]
+; CHECK-SD-NEXT: mov.b v2[11], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #48]
+; CHECK-SD-NEXT: mov.b v0[11], w9
+; CHECK-SD-NEXT: mov.b v1[11], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #432]
+; CHECK-SD-NEXT: mov.b v3[11], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #304]
+; CHECK-SD-NEXT: ldr w11, [sp, #176]
+; CHECK-SD-NEXT: mov.b v2[12], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #56]
+; CHECK-SD-NEXT: mov.b v0[12], w9
+; CHECK-SD-NEXT: mov.b v1[12], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #440]
+; CHECK-SD-NEXT: mov.b v3[12], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #312]
+; CHECK-SD-NEXT: ldr w11, [sp, #184]
+; CHECK-SD-NEXT: mov.b v2[13], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #64]
+; CHECK-SD-NEXT: mov.b v0[13], w9
+; CHECK-SD-NEXT: mov.b v1[13], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #448]
+; CHECK-SD-NEXT: mov.b v3[13], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #320]
+; CHECK-SD-NEXT: ldr w11, [sp, #192]
+; CHECK-SD-NEXT: mov.b v2[14], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #72]
+; CHECK-SD-NEXT: mov.b v0[14], w9
+; CHECK-SD-NEXT: mov.b v1[14], w10
+; CHECK-SD-NEXT: ldr w9, [sp, #456]
+; CHECK-SD-NEXT: mov.b v3[14], w11
+; CHECK-SD-NEXT: ldr w10, [sp, #328]
+; CHECK-SD-NEXT: ldr w11, [sp, #200]
+; CHECK-SD-NEXT: mov.b v2[15], w8
+; CHECK-SD-NEXT: mov.b v0[15], w9
+; CHECK-SD-NEXT: mov.b v1[15], w10
+; CHECK-SD-NEXT: mov.b v3[15], w11
+; CHECK-SD-NEXT: shl.16b v2, v2, #7
+; CHECK-SD-NEXT: shl.16b v4, v1, #7
+; CHECK-SD-NEXT: shl.16b v5, v0, #7
+; CHECK-SD-NEXT: shl.16b v3, v3, #7
+; CHECK-SD-NEXT: cmlt.16b v0, v2, #0
+; CHECK-SD-NEXT: cmlt.16b v2, v4, #0
+; CHECK-SD-NEXT: cmlt.16b v1, v3, #0
+; CHECK-SD-NEXT: cmlt.16b v3, v5, #0
+; CHECK-SD-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v64i1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x29, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: ldr s0, [sp, #32]
+; CHECK-GI-NEXT: ldr s4, [sp, #40]
+; CHECK-GI-NEXT: ldr s2, [sp, #96]
+; CHECK-GI-NEXT: ldr s5, [sp, #104]
+; CHECK-GI-NEXT: ldr s1, [sp, #64]
+; CHECK-GI-NEXT: ldr s23, [sp, #72]
+; CHECK-GI-NEXT: mov.s v0[1], v4[0]
+; CHECK-GI-NEXT: ldr s28, [sp, #200]
+; CHECK-GI-NEXT: ldr s3, [sp, #128]
+; CHECK-GI-NEXT: mov.s v2[1], v5[0]
+; CHECK-GI-NEXT: mov.s v1[1], v23[0]
+; CHECK-GI-NEXT: ldr s5, [sp, #192]
+; CHECK-GI-NEXT: ldr s7, [sp, #136]
+; CHECK-GI-NEXT: ldr s4, [sp, #160]
+; CHECK-GI-NEXT: ldr s24, [sp, #168]
+; CHECK-GI-NEXT: mov.s v5[1], v28[0]
+; CHECK-GI-NEXT: ldr s6, [sp, #48]
+; CHECK-GI-NEXT: ldr s21, [sp, #80]
+; CHECK-GI-NEXT: mov.s v3[1], v7[0]
+; CHECK-GI-NEXT: mov.s v4[1], v24[0]
+; CHECK-GI-NEXT: ldr s16, [sp, #112]
+; CHECK-GI-NEXT: ldr s29, [sp, #208]
+; CHECK-GI-NEXT: mov.s v0[2], v6[0]
+; CHECK-GI-NEXT: mov.s v1[2], v21[0]
+; CHECK-GI-NEXT: ldr s6, [sp, #224]
+; CHECK-GI-NEXT: ldr s30, [sp, #232]
+; CHECK-GI-NEXT: mov.s v2[2], v16[0]
+; CHECK-GI-NEXT: ldr s20, [sp, #144]
+; CHECK-GI-NEXT: ldr s27, [sp, #176]
+; CHECK-GI-NEXT: mov.s v5[2], v29[0]
+; CHECK-GI-NEXT: mov.s v6[1], v30[0]
+; CHECK-GI-NEXT: ldr s18, [sp, #88]
+; CHECK-GI-NEXT: ldr s19, [sp, #120]
+; CHECK-GI-NEXT: ldr s7, [sp, #256]
+; CHECK-GI-NEXT: ldr s31, [sp, #264]
+; CHECK-GI-NEXT: mov.s v3[2], v20[0]
+; CHECK-GI-NEXT: mov.s v4[2], v27[0]
+; CHECK-GI-NEXT: ldr s25, [sp, #216]
+; CHECK-GI-NEXT: ldr s26, [sp, #240]
+; CHECK-GI-NEXT: ldr s17, [sp, #56]
+; CHECK-GI-NEXT: ldr s22, [sp, #152]
+; CHECK-GI-NEXT: mov.s v1[3], v18[0]
+; CHECK-GI-NEXT: ldr s23, [sp, #184]
+; CHECK-GI-NEXT: mov.s v2[3], v19[0]
+; CHECK-GI-NEXT: ldr s18, [sp, #320]
+; CHECK-GI-NEXT: ldr s27, [sp, #328]
+; CHECK-GI-NEXT: mov.s v7[1], v31[0]
+; CHECK-GI-NEXT: ldr s19, [sp, #352]
+; CHECK-GI-NEXT: ldr s29, [sp, #360]
+; CHECK-GI-NEXT: mov.s v5[3], v25[0]
+; CHECK-GI-NEXT: mov.s v6[2], v26[0]
+; CHECK-GI-NEXT: fmov s25, w0
+; CHECK-GI-NEXT: fmov s26, w4
+; CHECK-GI-NEXT: ldr s28, [sp, #272]
+; CHECK-GI-NEXT: mov.s v0[3], v17[0]
+; CHECK-GI-NEXT: ldr s17, [sp, #288]
+; CHECK-GI-NEXT: ldr s8, [sp, #296]
+; CHECK-GI-NEXT: mov.s v3[3], v22[0]
+; CHECK-GI-NEXT: ldr s20, [sp, #384]
+; CHECK-GI-NEXT: mov.s v4[3], v23[0]
+; CHECK-GI-NEXT: ldr s30, [sp, #392]
+; CHECK-GI-NEXT: ldr s22, [sp, #416]
+; CHECK-GI-NEXT: ldr s31, [sp, #424]
+; CHECK-GI-NEXT: ldr s23, [sp, #448]
+; CHECK-GI-NEXT: mov.s v18[1], v27[0]
+; CHECK-GI-NEXT: mov.s v19[1], v29[0]
+; CHECK-GI-NEXT: ldr s27, [sp, #456]
+; CHECK-GI-NEXT: ldr s24, [sp, #336]
+; CHECK-GI-NEXT: mov.s v17[1], v8[0]
+; CHECK-GI-NEXT: mov.s v7[2], v28[0]
+; CHECK-GI-NEXT: mov.s v25[1], w1
+; CHECK-GI-NEXT: mov.s v26[1], w5
+; CHECK-GI-NEXT: mov.s v20[1], v30[0]
+; CHECK-GI-NEXT: ldr s28, [sp, #368]
+; CHECK-GI-NEXT: mov.s v22[1], v31[0]
+; CHECK-GI-NEXT: mov.s v23[1], v27[0]
+; CHECK-GI-NEXT: ldr s9, [sp, #304]
+; CHECK-GI-NEXT: ldr s27, [sp, #400]
+; CHECK-GI-NEXT: mov.s v18[2], v24[0]
+; CHECK-GI-NEXT: ldr s24, [sp, #432]
+; CHECK-GI-NEXT: mov.s v19[2], v28[0]
+; CHECK-GI-NEXT: ldr s28, [sp, #464]
+; CHECK-GI-NEXT: ldr s16, [sp, #248]
+; CHECK-GI-NEXT: ldr s21, [sp, #280]
+; CHECK-GI-NEXT: mov.s v17[2], v9[0]
+; CHECK-GI-NEXT: mov.s v25[2], w2
+; CHECK-GI-NEXT: mov.s v26[2], w6
+; CHECK-GI-NEXT: mov.s v20[2], v27[0]
+; CHECK-GI-NEXT: mov.s v22[2], v24[0]
+; CHECK-GI-NEXT: mov.s v23[2], v28[0]
+; CHECK-GI-NEXT: ldr s29, [sp, #312]
+; CHECK-GI-NEXT: ldr s27, [sp, #344]
+; CHECK-GI-NEXT: ldr s24, [sp, #376]
+; CHECK-GI-NEXT: ldr s28, [sp, #408]
+; CHECK-GI-NEXT: mov.s v6[3], v16[0]
+; CHECK-GI-NEXT: ldr s16, [sp, #440]
+; CHECK-GI-NEXT: mov.s v7[3], v21[0]
+; CHECK-GI-NEXT: ldr s21, [sp, #472]
+; CHECK-GI-NEXT: mov.s v25[3], w3
+; CHECK-GI-NEXT: mov.s v26[3], w7
+; CHECK-GI-NEXT: mov.s v17[3], v29[0]
+; CHECK-GI-NEXT: mov.s v18[3], v27[0]
+; CHECK-GI-NEXT: mov.s v19[3], v24[0]
+; CHECK-GI-NEXT: mov.s v20[3], v28[0]
+; CHECK-GI-NEXT: mov.s v22[3], v16[0]
+; CHECK-GI-NEXT: mov.s v23[3], v21[0]
+; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
+; CHECK-GI-NEXT: uzp1.8h v1, v2, v3
+; CHECK-GI-NEXT: uzp1.8h v2, v4, v5
+; CHECK-GI-NEXT: uzp1.8h v3, v6, v7
+; CHECK-GI-NEXT: ldr x29, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: uzp1.8h v16, v25, v26
+; CHECK-GI-NEXT: uzp1.8h v4, v17, v18
+; CHECK-GI-NEXT: uzp1.8h v5, v19, v20
+; CHECK-GI-NEXT: uzp1.8h v6, v22, v23
+; CHECK-GI-NEXT: uzp1.16b v1, v1, v2
+; CHECK-GI-NEXT: uzp1.16b v0, v16, v0
+; CHECK-GI-NEXT: uzp1.16b v2, v3, v4
+; CHECK-GI-NEXT: uzp1.16b v3, v5, v6
+; CHECK-GI-NEXT: shl.16b v1, v1, #7
+; CHECK-GI-NEXT: shl.16b v0, v0, #7
+; CHECK-GI-NEXT: shl.16b v2, v2, #7
+; CHECK-GI-NEXT: shl.16b v3, v3, #7
+; CHECK-GI-NEXT: sshr.16b v1, v1, #7
+; CHECK-GI-NEXT: sshr.16b v0, v0, #7
+; CHECK-GI-NEXT: sshr.16b v2, v2, #7
+; CHECK-GI-NEXT: sshr.16b v3, v3, #7
+; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
%res = sext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index cff60bdf44ca48..7c71449a316338 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -284,26 +284,18 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-NEXT: usubl2.4s v3, v3, v4
; CHECK-GI-NEXT: usubl.4s v4, v0, v1
; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: neg.4s v6, v5
+; CHECK-GI-NEXT: neg.4s v7, v3
; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: neg.4s v17, v0
+; CHECK-GI-NEXT: cmgt.4s v18, v2, v3
+; CHECK-GI-NEXT: cmgt.4s v19, v2, v4
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
-; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: shl.4s v1, v1, #31
-; CHECK-GI-NEXT: shl.4s v6, v6, #31
-; CHECK-GI-NEXT: shl.4s v7, v7, #31
-; CHECK-GI-NEXT: shl.4s v2, v2, #31
-; CHECK-GI-NEXT: sshr.4s v1, v1, #31
-; CHECK-GI-NEXT: sshr.4s v6, v6, #31
-; CHECK-GI-NEXT: sshr.4s v7, v7, #31
-; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
+; CHECK-GI-NEXT: bsl.16b v1, v6, v5
+; CHECK-GI-NEXT: bit.16b v3, v7, v18
+; CHECK-GI-NEXT: bit.16b v4, v16, v19
+; CHECK-GI-NEXT: bit.16b v0, v17, v2
; CHECK-GI-NEXT: add.4s v1, v1, v3
; CHECK-GI-NEXT: add.4s v0, v4, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
@@ -340,26 +332,18 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4
; CHECK-GI-NEXT: ssubl.4s v4, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: neg.4s v6, v5
+; CHECK-GI-NEXT: neg.4s v7, v3
; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: neg.4s v17, v0
+; CHECK-GI-NEXT: cmgt.4s v18, v2, v3
+; CHECK-GI-NEXT: cmgt.4s v19, v2, v4
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
-; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: shl.4s v1, v1, #31
-; CHECK-GI-NEXT: shl.4s v6, v6, #31
-; CHECK-GI-NEXT: shl.4s v7, v7, #31
-; CHECK-GI-NEXT: shl.4s v2, v2, #31
-; CHECK-GI-NEXT: sshr.4s v1, v1, #31
-; CHECK-GI-NEXT: sshr.4s v6, v6, #31
-; CHECK-GI-NEXT: sshr.4s v7, v7, #31
-; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
+; CHECK-GI-NEXT: bsl.16b v1, v6, v5
+; CHECK-GI-NEXT: bit.16b v3, v7, v18
+; CHECK-GI-NEXT: bit.16b v4, v16, v19
+; CHECK-GI-NEXT: bit.16b v0, v17, v2
; CHECK-GI-NEXT: add.4s v1, v1, v3
; CHECK-GI-NEXT: add.4s v0, v4, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
@@ -397,16 +381,12 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: usubl.4s v3, v1, v2
; CHECK-GI-NEXT: usubl2.4s v1, v1, v2
-; CHECK-GI-NEXT: cmgt.4s v2, v0, v3
+; CHECK-GI-NEXT: neg.4s v2, v3
+; CHECK-GI-NEXT: neg.4s v4, v1
+; CHECK-GI-NEXT: cmgt.4s v5, v0, v3
; CHECK-GI-NEXT: cmgt.4s v0, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v3
-; CHECK-GI-NEXT: neg.4s v5, v1
-; CHECK-GI-NEXT: shl.4s v2, v2, #31
-; CHECK-GI-NEXT: shl.4s v0, v0, #31
-; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: sshr.4s v0, v0, #31
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
+; CHECK-GI-NEXT: bif.16b v2, v3, v5
+; CHECK-GI-NEXT: bsl.16b v0, v4, v1
; CHECK-GI-NEXT: add.4s v0, v2, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -433,19 +413,15 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: sabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v3
+; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT: neg.4s v1, v3
+; CHECK-GI-NEXT: neg.4s v4, v0
+; CHECK-GI-NEXT: cmgt.4s v5, v2, v3
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v4, v3
-; CHECK-GI-NEXT: neg.4s v5, v0
-; CHECK-GI-NEXT: shl.4s v1, v1, #31
-; CHECK-GI-NEXT: shl.4s v2, v2, #31
-; CHECK-GI-NEXT: sshr.4s v1, v1, #31
-; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bsl.16b v1, v4, v3
-; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: bif.16b v1, v3, v5
+; CHECK-GI-NEXT: bit.16b v0, v4, v2
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 54ab4af413f99e..82e29d0f8a194f 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -9,7 +9,7 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f32_float
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f32_i32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v7f16_half
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16f16_half
+; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for v16f16_half
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v7f16_i32
; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for v16f16_i32
@@ -184,10 +184,6 @@ define <4 x double> @v4f64_double(<4 x double> %a, <4 x double> %b, <4 x double>
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcmgt v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: fcmgt v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-GI-NEXT: ret
@@ -305,10 +301,6 @@ define <8 x float> @v8f32_float(<8 x float> %a, <8 x float> %b, <8 x float> %d,
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: fcmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-GI-NEXT: ret
@@ -367,10 +359,6 @@ define <8 x i32> @v8f32_i32(<8 x float> %a, <8 x float> %b, <8 x i32> %d, <8 x i
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: fcmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-GI-NEXT: ret
@@ -855,117 +843,134 @@ define <16 x half> @v16f16_half(<16 x half> %a, <16 x half> %b, <16 x half> %d,
;
; CHECK-GI-NOFP16-LABEL: v16f16_half:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h16, v3.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h18, v3.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s20, h3
-; CHECK-GI-NOFP16-NEXT: fcvt s21, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: mov h16, v3.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s21, s20
-; CHECK-GI-NOFP16-NEXT: mov h20, v3.h[4]
-; CHECK-GI-NOFP16-NEXT: mov h21, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h16, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h17, v2.h[1]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s19, h2
+; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h21, v2.h[2]
; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: csetm w14, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s19, s18
-; CHECK-GI-NOFP16-NEXT: mov h18, v3.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: fcmp s18, s19
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov h19, v2.h[3]
; CHECK-GI-NOFP16-NEXT: fcvt s20, h20
; CHECK-GI-NOFP16-NEXT: fcvt s21, h21
-; CHECK-GI-NOFP16-NEXT: csetm w13, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: mov h16, v3.h[6]
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: cset w14, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s16, s17
+; CHECK-GI-NOFP16-NEXT: mov h16, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h17, v2.h[4]
; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[7]
; CHECK-GI-NOFP16-NEXT: fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: csetm w11, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s21, s20
+; CHECK-GI-NOFP16-NEXT: cset w15, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s20, s21
+; CHECK-GI-NOFP16-NEXT: mov h20, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h21, v2.h[5]
; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: csetm w12, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s19, s18
-; CHECK-GI-NOFP16-NEXT: mov h18, v2.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: csetm w10, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
+; CHECK-GI-NOFP16-NEXT: cset w13, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s18, s19
+; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov h19, v2.h[6]
+; CHECK-GI-NOFP16-NEXT: fcvt s20, h20
+; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-NOFP16-NEXT: fcvt s21, h21
+; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7]
+; CHECK-GI-NOFP16-NEXT: cset w12, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s16, s17
; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: mov h18, v2.h[2]
+; CHECK-GI-NOFP16-NEXT: fcvt s18, h1
; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: csetm w9, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s1, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: csetm w15, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h17, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT: csetm w16, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s3, s1
-; CHECK-GI-NOFP16-NEXT: fmov s1, w14
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s19, h3
+; CHECK-GI-NOFP16-NEXT: cset w11, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s20, s21
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT: mov h20, v3.h[3]
+; CHECK-GI-NOFP16-NEXT: cset w9, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s16, s17
+; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h17, v3.h[1]
+; CHECK-GI-NOFP16-NEXT: cset w8, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s0, s2
+; CHECK-GI-NOFP16-NEXT: mov h0, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h2, v3.h[2]
; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: csetm w14, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s19, s18
-; CHECK-GI-NOFP16-NEXT: mov h18, v2.h[4]
-; CHECK-GI-NOFP16-NEXT: fmov s3, w14
-; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: mov h16, v2.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], w16
-; CHECK-GI-NOFP16-NEXT: mov h17, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], w13
+; CHECK-GI-NOFP16-NEXT: cset w10, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s18, s19
+; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[3]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT: fcvt s19, h2
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[4]
+; CHECK-GI-NOFP16-NEXT: cset w16, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s16, s17
+; CHECK-GI-NOFP16-NEXT: mov h16, v3.h[4]
+; CHECK-GI-NOFP16-NEXT: fcvt s17, h18
+; CHECK-GI-NOFP16-NEXT: fmov s18, w16
+; CHECK-GI-NOFP16-NEXT: cset w17, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s0, s19
+; CHECK-GI-NOFP16-NEXT: fcvt s19, h20
+; CHECK-GI-NOFP16-NEXT: fmov s0, w14
+; CHECK-GI-NOFP16-NEXT: fmov s20, w15
+; CHECK-GI-NOFP16-NEXT: fmov s21, w17
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
+; CHECK-GI-NOFP16-NEXT: cset w14, mi
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v20.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v18.h[1], v21.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h20, v1.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h21, v3.h[5]
+; CHECK-GI-NOFP16-NEXT: fcmp s17, s19
+; CHECK-GI-NOFP16-NEXT: fmov s17, w13
+; CHECK-GI-NOFP16-NEXT: fmov s19, w14
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v17.h[0]
+; CHECK-GI-NOFP16-NEXT: cset w13, mi
+; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v18.h[2], v19.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h19, v3.h[6]
+; CHECK-GI-NOFP16-NEXT: fcmp s2, s16
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h20
+; CHECK-GI-NOFP16-NEXT: fcvt s16, h21
+; CHECK-GI-NOFP16-NEXT: fmov s20, w12
+; CHECK-GI-NOFP16-NEXT: fmov s21, w13
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
+; CHECK-GI-NOFP16-NEXT: mov h3, v3.h[7]
+; CHECK-GI-NOFP16-NEXT: cset w12, mi
; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: fcmp s19, s18
-; CHECK-GI-NOFP16-NEXT: mov h18, v2.h[6]
-; CHECK-GI-NOFP16-NEXT: mov h19, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], w11
-; CHECK-GI-NOFP16-NEXT: mov h2, v2.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], w12
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s17, s16
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], w10
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: fcmp s0, s2
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], w9
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], w8
-; CHECK-GI-NOFP16-NEXT: csetm w8, mi
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], w15
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], w8
-; CHECK-GI-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v20.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s19, h19
+; CHECK-GI-NOFP16-NEXT: mov v18.h[3], v21.h[0]
+; CHECK-GI-NOFP16-NEXT: fcmp s2, s16
+; CHECK-GI-NOFP16-NEXT: fmov s2, w11
+; CHECK-GI-NOFP16-NEXT: fmov s16, w12
+; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: cset w11, mi
+; CHECK-GI-NOFP16-NEXT: fcmp s17, s19
+; CHECK-GI-NOFP16-NEXT: mov v18.h[4], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvt s2, h3
+; CHECK-GI-NOFP16-NEXT: fmov s3, w9
+; CHECK-GI-NOFP16-NEXT: fmov s16, w11
+; CHECK-GI-NOFP16-NEXT: cset w9, mi
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v18.h[5], v16.h[0]
+; CHECK-GI-NOFP16-NEXT: fcmp s1, s2
+; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: fmov s2, w9
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: cset w8, mi
+; CHECK-GI-NOFP16-NEXT: fmov s1, w10
+; CHECK-GI-NOFP16-NEXT: mov v18.h[6], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v18.h[7], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: shl v0.8h, v0.8h, #15
+; CHECK-GI-NOFP16-NEXT: shl v1.8h, v18.8h, #15
+; CHECK-GI-NOFP16-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NOFP16-NEXT: sshr v1.8h, v1.8h, #15
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
+; CHECK-GI-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: v16f16_half:
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 8505caa39959ba..d2b44bb5e3f9f1 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -167,10 +167,6 @@ define <8 x i32> @v8i32_i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %d, <8 x i32>
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: cmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index fa5902c65ce05a..fa1ab61a6216f4 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -10,7 +10,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f32
; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3f16
; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for stofp_v2i8_v2f16
define double @stofp_i64_f64(i64 %a) {
; CHECK-LABEL: stofp_i64_f64:
@@ -5563,13 +5562,21 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-FP16-NEXT: fmov w9, s0
-; CHECK-GI-FP16-NEXT: sxtb w9, w9
-; CHECK-GI-FP16-NEXT: sxtb w8, w8
-; CHECK-GI-FP16-NEXT: scvtf h0, w9
-; CHECK-GI-FP16-NEXT: scvtf h1, w8
+; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v0.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v0.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v0.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index b2fc477d8655a4..af839d3b60836d 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -4551,10 +4551,6 @@ define <4 x i64> @fcmoeq4xdouble(<4 x double> %A, <4 x double> %B) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fcmeq v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: fcmeq v1.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
; CHECK-GI-NEXT: ret
%tmp3 = fcmp oeq <4 x double> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i64>
@@ -4572,10 +4568,6 @@ define <8 x i32> @fcmoeq8xfloat(<8 x float> %A, <8 x float> %B) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fcmeq v0.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: fcmeq v1.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: ret
%tmp3 = fcmp oeq <8 x float> %A, %B
%tmp4 = sext <8 x i1> %tmp3 to <8 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 014e4071a4bf61..dd53780be14c16 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -2,11 +2,8 @@
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for sext_v3i8_v3i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i8_v3i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v16i10_v16i16
+; CHECK-GI: warning: Instruction selection used fallback path for sext_v3i8_v3i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i32
define i16 @sext_i8_to_i16(i8 %a) {
; CHECK-LABEL: sext_i8_to_i16:
@@ -214,14 +211,25 @@ entry:
}
define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) {
-; CHECK-LABEL: sext_v3i8_v3i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: mov v0.h[1], w1
-; CHECK-NEXT: mov v0.h[2], w2
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v3i8_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i8_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: ret
entry:
%c = sext <3 x i8> %a to <3 x i16>
ret <3 x i16> %c
@@ -266,10 +274,9 @@ define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
; CHECK-GI-NEXT: fmov d0, x0
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
-; CHECK-GI-NEXT: lsl x8, x2, #56
-; CHECK-GI-NEXT: asr x8, x8, #56
-; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: sxtb x8, w2
; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov v0.d[1], x1
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
; CHECK-GI-NEXT: mov d1, v0.d[1]
@@ -356,14 +363,25 @@ entry:
}
define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) {
-; CHECK-LABEL: sext_v3i10_v3i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: mov v0.h[1], w1
-; CHECK-NEXT: mov v0.h[2], w2
-; CHECK-NEXT: shl v0.4h, v0.4h, #6
-; CHECK-NEXT: sshr v0.4h, v0.4h, #6
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v3i10_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #6
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #6
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v3i10_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: mov v0.s[1], w1
+; CHECK-GI-NEXT: mov v0.s[2], w2
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: shl v0.4h, v0.4h, #6
+; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #6
+; CHECK-GI-NEXT: ret
entry:
%c = sext <3 x i10> %a to <3 x i16>
ret <3 x i16> %c
@@ -408,10 +426,9 @@ define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
; CHECK-GI-NEXT: fmov d0, x0
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
-; CHECK-GI-NEXT: lsl x8, x2, #54
-; CHECK-GI-NEXT: asr x8, x8, #54
-; CHECK-GI-NEXT: mov v0.d[1], x1
+; CHECK-GI-NEXT: sbfx x8, x2, #0, #10
; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov v0.d[1], x1
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
; CHECK-GI-NEXT: mov d1, v0.d[1]
@@ -953,37 +970,69 @@ entry:
}
define <16 x i16> @sext_v16i10_v16i16(<16 x i10> %a) {
-; CHECK-LABEL: sext_v16i10_v16i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr w8, [sp]
-; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: ldr w9, [sp, #8]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ldr w8, [sp, #16]
-; CHECK-NEXT: mov v1.h[1], w1
-; CHECK-NEXT: mov v0.h[1], w9
-; CHECK-NEXT: mov v1.h[2], w2
-; CHECK-NEXT: mov v0.h[2], w8
-; CHECK-NEXT: ldr w8, [sp, #24]
-; CHECK-NEXT: mov v1.h[3], w3
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: ldr w8, [sp, #32]
-; CHECK-NEXT: mov v1.h[4], w4
-; CHECK-NEXT: mov v0.h[4], w8
-; CHECK-NEXT: ldr w8, [sp, #40]
-; CHECK-NEXT: mov v1.h[5], w5
-; CHECK-NEXT: mov v0.h[5], w8
-; CHECK-NEXT: ldr w8, [sp, #48]
-; CHECK-NEXT: mov v1.h[6], w6
-; CHECK-NEXT: mov v0.h[6], w8
-; CHECK-NEXT: ldr w8, [sp, #56]
-; CHECK-NEXT: mov v1.h[7], w7
-; CHECK-NEXT: mov v0.h[7], w8
-; CHECK-NEXT: shl v1.8h, v1.8h, #6
-; CHECK-NEXT: shl v2.8h, v0.8h, #6
-; CHECK-NEXT: sshr v0.8h, v1.8h, #6
-; CHECK-NEXT: sshr v1.8h, v2.8h, #6
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v16i10_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr w8, [sp]
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: ldr w9, [sp, #8]
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: ldr w8, [sp, #16]
+; CHECK-SD-NEXT: mov v1.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[1], w9
+; CHECK-SD-NEXT: mov v1.h[2], w2
+; CHECK-SD-NEXT: mov v0.h[2], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #24]
+; CHECK-SD-NEXT: mov v1.h[3], w3
+; CHECK-SD-NEXT: mov v0.h[3], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #32]
+; CHECK-SD-NEXT: mov v1.h[4], w4
+; CHECK-SD-NEXT: mov v0.h[4], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #40]
+; CHECK-SD-NEXT: mov v1.h[5], w5
+; CHECK-SD-NEXT: mov v0.h[5], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #48]
+; CHECK-SD-NEXT: mov v1.h[6], w6
+; CHECK-SD-NEXT: mov v0.h[6], w8
+; CHECK-SD-NEXT: ldr w8, [sp, #56]
+; CHECK-SD-NEXT: mov v1.h[7], w7
+; CHECK-SD-NEXT: mov v0.h[7], w8
+; CHECK-SD-NEXT: shl v1.8h, v1.8h, #6
+; CHECK-SD-NEXT: shl v2.8h, v0.8h, #6
+; CHECK-SD-NEXT: sshr v0.8h, v1.8h, #6
+; CHECK-SD-NEXT: sshr v1.8h, v2.8h, #6
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v16i10_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s4, w0
+; CHECK-GI-NEXT: fmov s5, w4
+; CHECK-GI-NEXT: ldr s0, [sp]
+; CHECK-GI-NEXT: ldr s1, [sp, #8]
+; CHECK-GI-NEXT: ldr s2, [sp, #32]
+; CHECK-GI-NEXT: ldr s3, [sp, #40]
+; CHECK-GI-NEXT: mov v4.s[1], w1
+; CHECK-GI-NEXT: mov v5.s[1], w5
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[1], v3.s[0]
+; CHECK-GI-NEXT: ldr s1, [sp, #16]
+; CHECK-GI-NEXT: ldr s3, [sp, #48]
+; CHECK-GI-NEXT: mov v4.s[2], w2
+; CHECK-GI-NEXT: mov v5.s[2], w6
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[2], v3.s[0]
+; CHECK-GI-NEXT: ldr s1, [sp, #24]
+; CHECK-GI-NEXT: ldr s3, [sp, #56]
+; CHECK-GI-NEXT: mov v4.s[3], w3
+; CHECK-GI-NEXT: mov v5.s[3], w7
+; CHECK-GI-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-GI-NEXT: mov v2.s[3], v3.s[0]
+; CHECK-GI-NEXT: uzp1 v1.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: shl v1.8h, v1.8h, #6
+; CHECK-GI-NEXT: shl v2.8h, v0.8h, #6
+; CHECK-GI-NEXT: sshr v0.8h, v1.8h, #6
+; CHECK-GI-NEXT: sshr v1.8h, v2.8h, #6
+; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i10> %a to <16 x i16>
ret <16 x i16> %c
More information about the llvm-commits
mailing list