[llvm] 75b18ba - Revert "[AArch64][GlobalISel] Fold away lowered vector sign-extend of vector compares."

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 4 03:03:12 PDT 2022


Author: Amara Emerson
Date: 2022-10-04T11:03:02+01:00
New Revision: 75b18ba14d07d38f7031f282f0d216fa6153fd81

URL: https://github.com/llvm/llvm-project/commit/75b18ba14d07d38f7031f282f0d216fa6153fd81
DIFF: https://github.com/llvm/llvm-project/commit/75b18ba14d07d38f7031f282f0d216fa6153fd81.diff

LOG: Revert "[AArch64][GlobalISel] Fold away lowered vector sign-extend of vector compares."

This reverts commit dcd02a524befcec71f46ab76d725b2a8f8623d69.

We should instead use the generic combine.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
    llvm/test/CodeGen/AArch64/arm64-vabs.ll
    llvm/test/CodeGen/AArch64/min-max.ll
    llvm/test/CodeGen/AArch64/neon-compare-instructions.ll

Removed: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1518d30bb9bf..fff69211d25a 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -203,13 +203,6 @@ def split_store_zero_128 : GICombineRule<
   (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
 >;
 
-def sext_via_shifts_of_vcmp : GICombineRule<
-  (defs root:$d, register_matchinfo:$info),
-  (match (wip_match_opcode G_ASHR):$d,
-          [{ return matchSextViaShiftsOfVCmp(*${d}, MRI, ${info}); }]),
-  (apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${info}); }])
->;
-
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -236,6 +229,6 @@ def AArch64PostLegalizerCombinerHelper
                         constant_fold, identity_combines,
                         ptr_add_immed_chain, overlapping_and,
                         split_store_zero_128, undef_combines,
-                        select_to_minmax, sext_via_shifts_of_vcmp]> {
+                        select_to_minmax]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 750bd513bc0e..fbeff1370ef3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -32,9 +32,7 @@
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
@@ -331,40 +329,6 @@ static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
   Store.eraseFromParent();
 }
 
-// Match a legalized vector sext of a vector compare. Vector compares always
-// sign-extend the low bit anyway. Unfortunately we have to match the G_SEXT
-// after it's been legalized to shifts since this is after legalization.
-static bool matchSextViaShiftsOfVCmp(MachineInstr &MI, MachineRegisterInfo &MRI,
-                                     Register &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_ASHR && "Expected G_ASHR");
-  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-  if (!DstTy.isVector())
-    return false;
-
-  Register ShlLHS;
-  int64_t ShlRHS;
-  if (!mi_match(MI.getOperand(1).getReg(), MRI,
-                m_GShl(m_Reg(ShlLHS), m_ICstOrSplat(ShlRHS))))
-    return false;
-
-  // Check the shift amount is correct for a sext.
-  if (ShlRHS != DstTy.getScalarSizeInBits() - 1)
-    return false;
-  if (!mi_match(MI.getOperand(2).getReg(), MRI, m_SpecificICstSplat(ShlRHS)))
-    return false;
-
-  // Check we're trying to extend a vector compare.
-  if (auto *Cmp = getOpcodeDef<GFCmp>(ShlLHS, MRI)) {
-    MatchInfo = Cmp->getReg(0);
-    return true;
-  }
-  if (auto *Cmp = getOpcodeDef<GICmp>(ShlLHS, MRI)) {
-    MatchInfo = Cmp->getReg(0);
-    return true;
-  }
-  return false;
-}
-
 #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGICombiner.inc"
 #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index 9ca9f1b86598..da06d82f2430 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -57,9 +57,10 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
 ; GISEL-LABEL: combine_vec_udiv_nonuniform:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    adrp x8, .LCPI1_4
-; GISEL-NEXT:    adrp x9, .LCPI1_5
+; GISEL-NEXT:    adrp x9, .LCPI1_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI1_4]
 ; GISEL-NEXT:    adrp x8, .LCPI1_3
+; GISEL-NEXT:    ldr q5, [x9, :lo12:.LCPI1_0]
 ; GISEL-NEXT:    neg v1.8h, v1.8h
 ; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI1_3]
 ; GISEL-NEXT:    adrp x8, .LCPI1_2
@@ -67,20 +68,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
 ; GISEL-NEXT:    umull2 v3.4s, v1.8h, v2.8h
 ; GISEL-NEXT:    umull v1.4s, v1.4h, v2.4h
 ; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI1_2]
-; GISEL-NEXT:    adrp x8, .LCPI1_1
+; GISEL-NEXT:    adrp x8, .LCPI1_5
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
 ; GISEL-NEXT:    sub v3.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v4.4s, v3.8h, v2.8h
 ; GISEL-NEXT:    umull v2.4s, v3.4h, v2.4h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_1]
-; GISEL-NEXT:    adrp x8, .LCPI1_0
-; GISEL-NEXT:    neg v3.8h, v3.8h
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_5]
+; GISEL-NEXT:    adrp x8, .LCPI1_1
+; GISEL-NEXT:    cmeq v3.8h, v3.8h, v5.8h
 ; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI1_5]
-; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI1_0]
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI1_1]
+; GISEL-NEXT:    shl v3.8h, v3.8h, #15
 ; GISEL-NEXT:    add v1.8h, v2.8h, v1.8h
-; GISEL-NEXT:    cmeq v2.8h, v4.8h, v5.8h
-; GISEL-NEXT:    ushl v1.8h, v1.8h, v3.8h
+; GISEL-NEXT:    neg v2.8h, v4.8h
+; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
+; GISEL-NEXT:    sshr v2.8h, v3.8h, #15
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
@@ -106,23 +108,25 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
 ; GISEL-LABEL: combine_vec_udiv_nonuniform2:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    adrp x8, .LCPI2_3
-; GISEL-NEXT:    adrp x9, .LCPI2_1
+; GISEL-NEXT:    adrp x9, .LCPI2_4
+; GISEL-NEXT:    adrp x10, .LCPI2_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI2_3]
 ; GISEL-NEXT:    adrp x8, .LCPI2_2
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI2_1]
+; GISEL-NEXT:    ldr q4, [x10, :lo12:.LCPI2_0]
 ; GISEL-NEXT:    neg v1.8h, v1.8h
 ; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_2]
-; GISEL-NEXT:    adrp x8, .LCPI2_4
+; GISEL-NEXT:    adrp x8, .LCPI2_1
 ; GISEL-NEXT:    ushl v1.8h, v0.8h, v1.8h
-; GISEL-NEXT:    neg v4.8h, v4.8h
 ; GISEL-NEXT:    umull2 v3.4s, v1.8h, v2.8h
+; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI2_1]
 ; GISEL-NEXT:    umull v1.4s, v1.4h, v2.4h
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_4]
-; GISEL-NEXT:    adrp x8, .LCPI2_0
+; GISEL-NEXT:    ldr q2, [x9, :lo12:.LCPI2_4]
+; GISEL-NEXT:    cmeq v2.8h, v2.8h, v4.8h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI2_0]
-; GISEL-NEXT:    cmeq v2.8h, v2.8h, v3.8h
-; GISEL-NEXT:    ushl v1.8h, v1.8h, v4.8h
+; GISEL-NEXT:    neg v3.8h, v5.8h
+; GISEL-NEXT:    shl v2.8h, v2.8h, #15
+; GISEL-NEXT:    ushl v1.8h, v1.8h, v3.8h
+; GISEL-NEXT:    sshr v2.8h, v2.8h, #15
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
@@ -147,21 +151,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
 ; GISEL-LABEL: combine_vec_udiv_nonuniform3:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    adrp x8, .LCPI3_2
-; GISEL-NEXT:    adrp x9, .LCPI3_3
+; GISEL-NEXT:    adrp x9, .LCPI3_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI3_2]
-; GISEL-NEXT:    adrp x8, .LCPI3_1
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI3_3]
+; GISEL-NEXT:    adrp x8, .LCPI3_3
+; GISEL-NEXT:    ldr q3, [x9, :lo12:.LCPI3_0]
 ; GISEL-NEXT:    umull2 v2.4s, v0.8h, v1.8h
 ; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
-; GISEL-NEXT:    adrp x8, .LCPI3_0
-; GISEL-NEXT:    neg v2.8h, v2.8h
-; GISEL-NEXT:    sub v3.8h, v0.8h, v1.8h
-; GISEL-NEXT:    usra v1.8h, v3.8h, #1
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI3_0]
-; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    cmeq v2.8h, v4.8h, v3.8h
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI3_3]
+; GISEL-NEXT:    adrp x8, .LCPI3_1
+; GISEL-NEXT:    cmeq v2.8h, v2.8h, v3.8h
+; GISEL-NEXT:    sub v4.8h, v0.8h, v1.8h
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI3_1]
+; GISEL-NEXT:    shl v2.8h, v2.8h, #15
+; GISEL-NEXT:    usra v1.8h, v4.8h, #1
+; GISEL-NEXT:    neg v3.8h, v3.8h
+; GISEL-NEXT:    sshr v2.8h, v2.8h, #15
+; GISEL-NEXT:    ushl v1.8h, v1.8h, v3.8h
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@@ -191,19 +197,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
 ; GISEL-LABEL: combine_vec_udiv_nonuniform4:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    adrp x8, .LCPI4_2
-; GISEL-NEXT:    adrp x9, .LCPI4_1
+; GISEL-NEXT:    adrp x9, .LCPI4_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI4_2]
 ; GISEL-NEXT:    adrp x8, .LCPI4_3
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI4_1]
+; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI4_0]
 ; GISEL-NEXT:    umull2 v2.8h, v0.16b, v1.16b
 ; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI4_3]
 ; GISEL-NEXT:    umull v1.8h, v0.8b, v1.8b
-; GISEL-NEXT:    adrp x8, .LCPI4_0
-; GISEL-NEXT:    neg v4.16b, v4.16b
+; GISEL-NEXT:    adrp x8, .LCPI4_1
+; GISEL-NEXT:    cmeq v3.16b, v3.16b, v4.16b
 ; GISEL-NEXT:    uzp2 v1.16b, v1.16b, v2.16b
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; GISEL-NEXT:    cmeq v2.16b, v3.16b, v2.16b
-; GISEL-NEXT:    ushl v1.16b, v1.16b, v4.16b
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT:    shl v3.16b, v3.16b, #7
+; GISEL-NEXT:    neg v2.16b, v2.16b
+; GISEL-NEXT:    ushl v1.16b, v1.16b, v2.16b
+; GISEL-NEXT:    sshr v2.16b, v3.16b, #7
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -240,26 +248,28 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
 ; GISEL-LABEL: pr38477:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    adrp x8, .LCPI5_3
-; GISEL-NEXT:    adrp x9, .LCPI5_4
+; GISEL-NEXT:    adrp x9, .LCPI5_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI5_3]
 ; GISEL-NEXT:    adrp x8, .LCPI5_2
+; GISEL-NEXT:    ldr q5, [x9, :lo12:.LCPI5_0]
 ; GISEL-NEXT:    umull2 v2.4s, v0.8h, v1.8h
 ; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
 ; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI5_2]
-; GISEL-NEXT:    adrp x8, .LCPI5_1
+; GISEL-NEXT:    adrp x8, .LCPI5_4
 ; GISEL-NEXT:    sub v3.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v4.4s, v3.8h, v2.8h
 ; GISEL-NEXT:    umull v2.4s, v3.4h, v2.4h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_1]
-; GISEL-NEXT:    adrp x8, .LCPI5_0
-; GISEL-NEXT:    neg v3.8h, v3.8h
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_4]
+; GISEL-NEXT:    adrp x8, .LCPI5_1
+; GISEL-NEXT:    cmeq v3.8h, v3.8h, v5.8h
 ; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI5_4]
-; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI5_0]
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT:    shl v3.8h, v3.8h, #15
 ; GISEL-NEXT:    add v1.8h, v2.8h, v1.8h
-; GISEL-NEXT:    cmeq v2.8h, v4.8h, v5.8h
-; GISEL-NEXT:    ushl v1.8h, v1.8h, v3.8h
+; GISEL-NEXT:    neg v2.8h, v4.8h
+; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
+; GISEL-NEXT:    sshr v2.8h, v3.8h, #15
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir
deleted file mode 100644
index 0c58d2c6aa7b..000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir
+++ /dev/null
@@ -1,146 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
----
-name:            sext_of_fcmp_v4s32
-legalized:       true
-liveins:
-  - { reg: '$q0' }
-  - { reg: '$x0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-body:             |
-  bb.1:
-    liveins: $q0, $x0, $x1, $x2
-
-    ; CHECK-LABEL: name: sext_of_fcmp_v4s32
-    ; CHECK: liveins: $q0, $x0, $x1, $x2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
-    ; CHECK-NEXT: $q0 = COPY [[FCMP]](<4 x s32>)
-    ; CHECK-NEXT: RET_ReallyLR implicit $q0
-    %0:_(<4 x s32>) = COPY $q0
-    %6:_(s32) = G_FCONSTANT float 6.000000e+00
-    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
-    %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
-    %19:_(s32) = G_CONSTANT i32 31
-    %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
-    %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
-    %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
-    $q0 = COPY %11(<4 x s32>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            sext_of_icmp_v4s32
-legalized:       true
-liveins:
-  - { reg: '$q0' }
-  - { reg: '$x0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-body:             |
-  bb.1:
-    liveins: $q0, $x0, $x1, $x2
-
-    ; CHECK-LABEL: name: sext_of_icmp_v4s32
-    ; CHECK: liveins: $q0, $x0, $x1, $x2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
-    ; CHECK-NEXT: $q0 = COPY [[ICMP]](<4 x s32>)
-    ; CHECK-NEXT: RET_ReallyLR implicit $q0
-    %0:_(<4 x s32>) = COPY $q0
-    %6:_(s32) = G_CONSTANT i32 42
-    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
-    %17:_(<4 x s32>) = G_ICMP intpred(eq), %0(<4 x s32>), %5
-    %19:_(s32) = G_CONSTANT i32 31
-    %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
-    %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
-    %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
-    $q0 = COPY %11(<4 x s32>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            sext_of_fcmp_wrong_shift
-legalized:       true
-liveins:
-  - { reg: '$q0' }
-  - { reg: '$x0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-body:             |
-  bb.1:
-    liveins: $q0, $x0, $x1, $x2
-
-    ; CHECK-LABEL: name: sext_of_fcmp_wrong_shift
-    ; CHECK: liveins: $q0, $x0, $x1, $x2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
-    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>)
-    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<4 x s32>)
-    ; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>)
-    ; CHECK-NEXT: RET_ReallyLR implicit $q0
-    %0:_(<4 x s32>) = COPY $q0
-    %6:_(s32) = G_FCONSTANT float 6.000000e+00
-    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
-    %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
-    %19:_(s32) = G_CONSTANT i32 29
-    %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
-    %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
-    %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>)
-    $q0 = COPY %11(<4 x s32>)
-    RET_ReallyLR implicit $q0
-
-...
----
-name:            sext_of_fcmp_mismatch_shift
-legalized:       true
-liveins:
-  - { reg: '$q0' }
-  - { reg: '$x0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-body:             |
-  bb.1:
-    liveins: $q0, $x0, $x1, $x2
-
-    ; CHECK-LABEL: name: sext_of_fcmp_mismatch_shift
-    ; CHECK: liveins: $q0, $x0, $x1, $x2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
-    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
-    ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 31
-    ; CHECK-NEXT: %cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32)
-    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>)
-    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], %cstv2(<4 x s32>)
-    ; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>)
-    ; CHECK-NEXT: RET_ReallyLR implicit $q0
-    %0:_(<4 x s32>) = COPY $q0
-    %6:_(s32) = G_FCONSTANT float 6.000000e+00
-    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
-    %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5
-    %19:_(s32) = G_CONSTANT i32 29
-    %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32)
-    %cst2:_(s32) = G_CONSTANT i32 31
-    %cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32)
-    %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>)
-    %11:_(<4 x s32>) = G_ASHR %18, %cstv2(<4 x s32>)
-    $q0 = COPY %11(<4 x s32>)
-    RET_ReallyLR implicit $q0
-
-...

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index f7e8d2e0e01b..cbe9dea42e14 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -305,6 +305,8 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
 ; GISEL-NEXT:    usubl.4s v0, v0, v1
 ; GISEL-NEXT:    cmgt.4s v1, v2, v0
 ; GISEL-NEXT:    neg.4s v2, v0
+; GISEL-NEXT:    shl.4s v1, v1, #31
+; GISEL-NEXT:    sshr.4s v1, v1, #31
 ; GISEL-NEXT:    bit.16b v0, v2, v1
 ; GISEL-NEXT:    addv.4s s0, v0
 ; GISEL-NEXT:    fmov w0, s0
@@ -376,6 +378,8 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
 ; GISEL-NEXT:    usubl.2d v0, v0, v1
 ; GISEL-NEXT:    cmgt.2d v1, v2, v0
 ; GISEL-NEXT:    neg.2d v2, v0
+; GISEL-NEXT:    shl.2d v1, v1, #63
+; GISEL-NEXT:    sshr.2d v1, v1, #63
 ; GISEL-NEXT:    bit.16b v0, v2, v1
 ; GISEL-NEXT:    addp.2d d0, v0
 ; GISEL-NEXT:    fmov x0, d0
@@ -1571,6 +1575,8 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.2s v2, v0
 ; GISEL-NEXT:    cmge.2s v1, v0, v1
+; GISEL-NEXT:    shl.2s v1, v1, #31
+; GISEL-NEXT:    sshr.2s v1, v1, #31
 ; GISEL-NEXT:    bif.8b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1591,6 +1597,8 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.4h v2, v0
 ; GISEL-NEXT:    cmgt.4h v1, v0, v1
+; GISEL-NEXT:    shl.4h v1, v1, #15
+; GISEL-NEXT:    sshr.4h v1, v1, #15
 ; GISEL-NEXT:    bif.8b v0, v2, v1
 ; GISEL-NEXT:    ret
 ; For GlobalISel, this generates terrible code until we can pattern match this to abs.
@@ -1612,6 +1620,8 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.8b v2, v0
 ; GISEL-NEXT:    cmgt.8b v1, v1, v0
+; GISEL-NEXT:    shl.8b v1, v1, #7
+; GISEL-NEXT:    sshr.8b v1, v1, #7
 ; GISEL-NEXT:    bit.8b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1632,6 +1642,8 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.4s v2, v0
 ; GISEL-NEXT:    cmge.4s v1, v0, v1
+; GISEL-NEXT:    shl.4s v1, v1, #31
+; GISEL-NEXT:    sshr.4s v1, v1, #31
 ; GISEL-NEXT:    bif.16b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1652,6 +1664,8 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.8h v2, v0
 ; GISEL-NEXT:    cmgt.8h v1, v0, v1
+; GISEL-NEXT:    shl.8h v1, v1, #15
+; GISEL-NEXT:    sshr.8h v1, v1, #15
 ; GISEL-NEXT:    bif.16b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1672,6 +1686,8 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.16b v2, v0
 ; GISEL-NEXT:    cmgt.16b v1, v1, v0
+; GISEL-NEXT:    shl.16b v1, v1, #7
+; GISEL-NEXT:    sshr.16b v1, v1, #7
 ; GISEL-NEXT:    bit.16b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1692,6 +1708,8 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
 ; GISEL-NEXT:    movi.2d v1, #0000000000000000
 ; GISEL-NEXT:    neg.2d v2, v0
 ; GISEL-NEXT:    cmge.2d v1, v1, v0
+; GISEL-NEXT:    shl.2d v1, v1, #63
+; GISEL-NEXT:    sshr.2d v1, v1, #63
 ; GISEL-NEXT:    bit.16b v0, v2, v1
 ; GISEL-NEXT:    ret
 
@@ -1713,6 +1731,8 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
 ; GISEL-NEXT:    ssubl.2d v0, v0, v1
 ; GISEL-NEXT:    cmgt.2d v1, v2, v0
 ; GISEL-NEXT:    neg.2d v2, v0
+; GISEL-NEXT:    shl.2d v1, v1, #63
+; GISEL-NEXT:    sshr.2d v1, v1, #63
 ; GISEL-NEXT:    bit.16b v0, v2, v1
 ; GISEL-NEXT:    ret
   %aext = sext <2 x i32> %a to <2 x i64>
@@ -1762,5 +1782,3 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
   %absel = select <2 x i1> %abcmp, <2 x i128> %ababs, <2 x i128> %ab
diff 
   ret <2 x i128> %absel
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; FALLBACK: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll
index 2cec030cad22..28311b45013e 100644
--- a/llvm/test/CodeGen/AArch64/min-max.ll
+++ b/llvm/test/CodeGen/AArch64/min-max.ll
@@ -224,11 +224,19 @@ define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
 declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
 
 define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: smax2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmgt v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-ISEL-LABEL: smax2i64:
+; CHECK-ISEL:       // %bb.0:
+; CHECK-ISEL-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-ISEL-NEXT:    ret
+;
+; CHECK-GLOBAL-LABEL: smax2i64:
+; CHECK-GLOBAL:       // %bb.0:
+; CHECK-GLOBAL-NEXT:    cmgt v2.2d, v0.2d, v1.2d
+; CHECK-GLOBAL-NEXT:    shl v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GLOBAL-NEXT:    ret
   %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %c
 }
@@ -249,6 +257,10 @@ define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
 ; CHECK-GLOBAL:       // %bb.0:
 ; CHECK-GLOBAL-NEXT:    cmgt v4.2d, v0.2d, v2.2d
 ; CHECK-GLOBAL-NEXT:    cmgt v5.2d, v1.2d, v3.2d
+; CHECK-GLOBAL-NEXT:    shl v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    shl v5.2d, v5.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v5.2d, v5.2d, #63
 ; CHECK-GLOBAL-NEXT:    bif v0.16b, v2.16b, v4.16b
 ; CHECK-GLOBAL-NEXT:    bif v1.16b, v3.16b, v5.16b
 ; CHECK-GLOBAL-NEXT:    stp q0, q1, [x0]
@@ -476,11 +488,19 @@ define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
 declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
 
 define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: umax2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhi v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-ISEL-LABEL: umax2i64:
+; CHECK-ISEL:       // %bb.0:
+; CHECK-ISEL-NEXT:    cmhi v2.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-ISEL-NEXT:    ret
+;
+; CHECK-GLOBAL-LABEL: umax2i64:
+; CHECK-GLOBAL:       // %bb.0:
+; CHECK-GLOBAL-NEXT:    cmhi v2.2d, v0.2d, v1.2d
+; CHECK-GLOBAL-NEXT:    shl v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GLOBAL-NEXT:    ret
   %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %c
 }
@@ -501,6 +521,10 @@ define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
 ; CHECK-GLOBAL:       // %bb.0:
 ; CHECK-GLOBAL-NEXT:    cmhi v4.2d, v0.2d, v2.2d
 ; CHECK-GLOBAL-NEXT:    cmhi v5.2d, v1.2d, v3.2d
+; CHECK-GLOBAL-NEXT:    shl v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    shl v5.2d, v5.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v5.2d, v5.2d, #63
 ; CHECK-GLOBAL-NEXT:    bif v0.16b, v2.16b, v4.16b
 ; CHECK-GLOBAL-NEXT:    bif v1.16b, v3.16b, v5.16b
 ; CHECK-GLOBAL-NEXT:    stp q0, q1, [x0]
@@ -728,11 +752,19 @@ define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
 declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
 
 define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: smin2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmgt v2.2d, v1.2d, v0.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-ISEL-LABEL: smin2i64:
+; CHECK-ISEL:       // %bb.0:
+; CHECK-ISEL-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-ISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-ISEL-NEXT:    ret
+;
+; CHECK-GLOBAL-LABEL: smin2i64:
+; CHECK-GLOBAL:       // %bb.0:
+; CHECK-GLOBAL-NEXT:    cmgt v2.2d, v1.2d, v0.2d
+; CHECK-GLOBAL-NEXT:    shl v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GLOBAL-NEXT:    ret
   %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %c
 }
@@ -753,6 +785,10 @@ define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
 ; CHECK-GLOBAL:       // %bb.0:
 ; CHECK-GLOBAL-NEXT:    cmgt v4.2d, v2.2d, v0.2d
 ; CHECK-GLOBAL-NEXT:    cmgt v5.2d, v3.2d, v1.2d
+; CHECK-GLOBAL-NEXT:    shl v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    shl v5.2d, v5.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v5.2d, v5.2d, #63
 ; CHECK-GLOBAL-NEXT:    bif v0.16b, v2.16b, v4.16b
 ; CHECK-GLOBAL-NEXT:    bif v1.16b, v3.16b, v5.16b
 ; CHECK-GLOBAL-NEXT:    stp q0, q1, [x0]
@@ -980,11 +1016,19 @@ define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
 declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
 
 define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: umin2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmhi v2.2d, v1.2d, v0.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-ISEL-LABEL: umin2i64:
+; CHECK-ISEL:       // %bb.0:
+; CHECK-ISEL-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-ISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-ISEL-NEXT:    ret
+;
+; CHECK-GLOBAL-LABEL: umin2i64:
+; CHECK-GLOBAL:       // %bb.0:
+; CHECK-GLOBAL-NEXT:    cmhi v2.2d, v1.2d, v0.2d
+; CHECK-GLOBAL-NEXT:    shl v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v2.2d, v2.2d, #63
+; CHECK-GLOBAL-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-GLOBAL-NEXT:    ret
   %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %c
 }
@@ -1005,6 +1049,10 @@ define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
 ; CHECK-GLOBAL:       // %bb.0:
 ; CHECK-GLOBAL-NEXT:    cmhi v4.2d, v2.2d, v0.2d
 ; CHECK-GLOBAL-NEXT:    cmhi v5.2d, v3.2d, v1.2d
+; CHECK-GLOBAL-NEXT:    shl v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    shl v5.2d, v5.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v4.2d, v4.2d, #63
+; CHECK-GLOBAL-NEXT:    sshr v5.2d, v5.2d, #63
 ; CHECK-GLOBAL-NEXT:    bif v0.16b, v2.16b, v4.16b
 ; CHECK-GLOBAL-NEXT:    bif v1.16b, v3.16b, v5.16b
 ; CHECK-GLOBAL-NEXT:    stp q0, q1, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 9b2d8bea0e4c..ec210b4efc7b 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -11,6 +11,8 @@ define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmeq8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -26,6 +28,8 @@ define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmeq16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -41,6 +45,8 @@ define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmeq4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -56,6 +62,8 @@ define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmeq8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -71,6 +79,8 @@ define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmeq2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -86,6 +96,8 @@ define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmeq4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -101,6 +113,8 @@ define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmeq2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -118,6 +132,8 @@ define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -135,6 +151,8 @@ define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -152,6 +170,8 @@ define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.4h, v0.4h, v1.4h
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -169,6 +189,8 @@ define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -186,6 +208,8 @@ define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -203,6 +227,8 @@ define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -220,6 +246,8 @@ define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmeq v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -235,6 +263,8 @@ define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmgt8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -250,6 +280,8 @@ define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmgt16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -265,6 +297,8 @@ define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmgt4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -280,6 +314,8 @@ define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmgt8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -295,6 +331,8 @@ define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmgt2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -310,6 +348,8 @@ define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmgt4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -325,6 +365,8 @@ define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmgt2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -341,6 +383,8 @@ define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmlt8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -357,6 +401,8 @@ define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmlt16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -373,6 +419,8 @@ define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmlt4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -389,6 +437,8 @@ define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmlt8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -405,6 +455,8 @@ define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmlt2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -421,6 +473,8 @@ define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmlt4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -437,6 +491,8 @@ define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmlt2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -452,6 +508,8 @@ define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmge8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -467,6 +525,8 @@ define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmge16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -482,6 +542,8 @@ define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmge4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -497,6 +559,8 @@ define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmge8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -512,6 +576,8 @@ define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmge2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -527,6 +593,8 @@ define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmge4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -542,6 +610,8 @@ define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmge2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -558,6 +628,8 @@ define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmle8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -574,6 +646,8 @@ define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmle16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -590,6 +664,8 @@ define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmle4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -606,6 +682,8 @@ define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmle8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -622,6 +700,8 @@ define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmle2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -638,6 +718,8 @@ define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmle4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -654,6 +736,8 @@ define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmle2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -669,6 +753,8 @@ define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmhi8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -684,6 +770,8 @@ define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmhi16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -699,6 +787,8 @@ define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmhi4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -714,6 +804,8 @@ define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmhi8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -729,6 +821,8 @@ define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmhi2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -744,6 +838,8 @@ define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmhi4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -759,6 +855,8 @@ define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmhi2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -775,6 +873,8 @@ define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmlo8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -791,6 +891,8 @@ define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmlo16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -807,6 +909,8 @@ define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmlo4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -823,6 +927,8 @@ define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmlo8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -839,6 +945,8 @@ define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmlo2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -855,6 +963,8 @@ define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmlo4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -871,6 +981,8 @@ define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmlo2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhi v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -886,6 +998,8 @@ define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmhs8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -901,6 +1015,8 @@ define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmhs16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -916,6 +1032,8 @@ define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmhs4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -931,6 +1049,8 @@ define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmhs8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -946,6 +1066,8 @@ define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmhs2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -961,6 +1083,8 @@ define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmhs4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -976,6 +1100,8 @@ define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmhs2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -992,6 +1118,8 @@ define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-LABEL: cmls8xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <8 x i8> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1008,6 +1136,8 @@ define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-LABEL: cmls16xi8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <16 x i8> %A, %B;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1024,6 +1154,8 @@ define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-LABEL: cmls4xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <4 x i16> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1040,6 +1172,8 @@ define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-LABEL: cmls8xi16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <8 x i16> %A, %B;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1056,6 +1190,8 @@ define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-LABEL: cmls2xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <2 x i32> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1072,6 +1208,8 @@ define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-LABEL: cmls4xi32:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <4 x i32> %A, %B;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1088,6 +1226,8 @@ define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-LABEL: cmls2xi64:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    cmhs v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <2 x i64> %A, %B;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1106,6 +1246,8 @@ define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    cmeq v0.8b, v0.8b, v2.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = and <8 x i8> %A, %B
   %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
@@ -1125,6 +1267,8 @@ define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    cmeq v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = and <16 x i8> %A, %B
   %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
@@ -1144,6 +1288,8 @@ define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    cmeq v0.4h, v0.4h, v2.4h
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = and <4 x i16> %A, %B
   %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
@@ -1163,6 +1309,8 @@ define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    cmeq v0.8h, v0.8h, v2.8h
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = and <8 x i16> %A, %B
   %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
@@ -1182,6 +1330,8 @@ define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    cmeq v0.2s, v0.2s, v2.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = and <2 x i32> %A, %B
   %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
@@ -1201,6 +1351,8 @@ define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v2.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = and <4 x i32> %A, %B
   %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
@@ -1220,6 +1372,8 @@ define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    cmeq v0.2d, v0.2d, v2.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = and <2 x i64> %A, %B
   %tmp4 = icmp ne <2 x i64> %tmp3, zeroinitializer
@@ -1239,6 +1393,8 @@ define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1255,6 +1411,8 @@ define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1271,6 +1429,8 @@ define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1287,6 +1447,8 @@ define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1303,6 +1465,8 @@ define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1319,6 +1483,8 @@ define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1335,6 +1501,8 @@ define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1352,6 +1520,8 @@ define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1368,6 +1538,8 @@ define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1384,6 +1556,8 @@ define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1400,6 +1574,8 @@ define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1416,6 +1592,8 @@ define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1432,6 +1610,8 @@ define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1448,6 +1628,8 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1578,6 +1760,8 @@ define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1594,6 +1778,8 @@ define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1610,6 +1796,8 @@ define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1626,6 +1814,8 @@ define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1642,6 +1832,8 @@ define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1658,6 +1850,8 @@ define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1674,6 +1868,8 @@ define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1690,6 +1886,8 @@ define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1706,6 +1904,8 @@ define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1722,6 +1922,8 @@ define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1738,6 +1940,8 @@ define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1754,6 +1958,8 @@ define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1770,6 +1976,8 @@ define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1786,6 +1994,8 @@ define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1802,6 +2012,8 @@ define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1818,6 +2030,8 @@ define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1834,6 +2048,8 @@ define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1850,6 +2066,8 @@ define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1866,6 +2084,8 @@ define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -1882,6 +2102,8 @@ define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -1898,6 +2120,8 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1915,6 +2139,8 @@ define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -1932,6 +2158,8 @@ define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -1949,6 +2177,8 @@ define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.4h, v0.4h, v1.4h
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -1966,6 +2196,8 @@ define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -1983,6 +2215,8 @@ define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2000,6 +2234,8 @@ define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2017,6 +2253,8 @@ define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmeq v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2035,6 +2273,8 @@ define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI126_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI126_0]
 ; GISEL-NEXT:    cmhs v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -2053,6 +2293,8 @@ define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI127_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI127_0]
 ; GISEL-NEXT:    cmhs v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -2071,6 +2313,8 @@ define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI128_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI128_0]
 ; GISEL-NEXT:    cmhs v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -2089,6 +2333,8 @@ define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI129_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI129_0]
 ; GISEL-NEXT:    cmhs v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -2107,6 +2353,8 @@ define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI130_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI130_0]
 ; GISEL-NEXT:    cmhs v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <2 x i32> %A, <i32 2, i32 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2125,6 +2373,8 @@ define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI131_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI131_0]
 ; GISEL-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2144,6 +2394,8 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI132_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI132_0]
 ; GISEL-NEXT:    cmhs v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp uge <2 x i64> %A, <i64 2, i64 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2163,6 +2415,8 @@ define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI133_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI133_0]
 ; GISEL-NEXT:    cmhi v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <8 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -2181,6 +2435,8 @@ define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI134_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI134_0]
 ; GISEL-NEXT:    cmhi v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <16 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -2199,6 +2455,8 @@ define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI135_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI135_0]
 ; GISEL-NEXT:    cmhi v0.4h, v0.4h, v1.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <4 x i16> %A, <i16 1, i16 1, i16 1, i16 1>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -2217,6 +2475,8 @@ define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI136_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI136_0]
 ; GISEL-NEXT:    cmhi v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <8 x i16> %A, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -2235,6 +2495,8 @@ define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI137_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI137_0]
 ; GISEL-NEXT:    cmhi v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <2 x i32> %A, <i32 1, i32 1>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2253,6 +2515,8 @@ define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI138_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI138_0]
 ; GISEL-NEXT:    cmhi v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <4 x i32> %A, <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2272,6 +2536,8 @@ define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI139_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI139_0]
 ; GISEL-NEXT:    cmhi v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2290,6 +2556,8 @@ define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -2308,6 +2576,8 @@ define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -2326,6 +2596,8 @@ define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -2344,6 +2616,8 @@ define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -2362,6 +2636,8 @@ define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2380,6 +2656,8 @@ define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2398,6 +2676,8 @@ define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    movi v1.2d, #0000000000000000
 ; GISEL-NEXT:    cmhs v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2417,6 +2697,8 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI147_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI147_0]
 ; GISEL-NEXT:    cmhi v0.8b, v1.8b, v0.8b
+; GISEL-NEXT:    shl v0.8b, v0.8b, #7
+; GISEL-NEXT:    sshr v0.8b, v0.8b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
@@ -2436,6 +2718,8 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI148_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI148_0]
 ; GISEL-NEXT:    cmhi v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    shl v0.16b, v0.16b, #7
+; GISEL-NEXT:    sshr v0.16b, v0.16b, #7
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
@@ -2455,6 +2739,8 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI149_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI149_0]
 ; GISEL-NEXT:    cmhi v0.4h, v1.4h, v0.4h
+; GISEL-NEXT:    shl v0.4h, v0.4h, #15
+; GISEL-NEXT:    sshr v0.4h, v0.4h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
@@ -2474,6 +2760,8 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI150_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI150_0]
 ; GISEL-NEXT:    cmhi v0.8h, v1.8h, v0.8h
+; GISEL-NEXT:    shl v0.8h, v0.8h, #15
+; GISEL-NEXT:    sshr v0.8h, v0.8h, #15
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
@@ -2493,6 +2781,8 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI151_0
 ; GISEL-NEXT:    ldr d1, [x8, :lo12:.LCPI151_0]
 ; GISEL-NEXT:    cmhi v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <2 x i32> %A, <i32 2, i32 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2512,6 +2802,8 @@ define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI152_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI152_0]
 ; GISEL-NEXT:    cmhi v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2532,6 +2824,8 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
 ; GISEL-NEXT:    adrp x8, .LCPI153_0
 ; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI153_0]
 ; GISEL-NEXT:    cmhi v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = icmp ult <2 x i64> %A, <i64 2, i64 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2547,6 +2841,8 @@ define <2 x i32> @fcmoeq2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmoeq2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2562,6 +2858,8 @@ define <4 x i32> @fcmoeq4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmoeq4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2576,6 +2874,8 @@ define <2 x i64> @fcmoeq2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmoeq2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2591,6 +2891,8 @@ define <2 x i32> @fcmoge2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmoge2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2606,6 +2908,8 @@ define <4 x i32> @fcmoge4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmoge4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2620,6 +2924,8 @@ define <2 x i64> @fcmoge2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmoge2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2635,6 +2941,8 @@ define <2 x i32> @fcmogt2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmogt2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2650,6 +2958,8 @@ define <4 x i32> @fcmogt4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmogt4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2664,6 +2974,8 @@ define <2 x i64> @fcmogt2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmogt2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2680,6 +2992,8 @@ define <2 x i32> @fcmole2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmole2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2696,6 +3010,8 @@ define <4 x i32> @fcmole4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmole4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2712,6 +3028,8 @@ define <2 x i64> @fcmole2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmole2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2728,6 +3046,8 @@ define <2 x i32> @fcmolt2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmolt2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2744,6 +3064,8 @@ define <4 x i32> @fcmolt4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmolt4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2760,6 +3082,8 @@ define <2 x i64> @fcmolt2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmolt2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2780,6 +3104,8 @@ define <2 x i32> @fcmone2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2800,6 +3126,8 @@ define <4 x i32> @fcmone4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2821,6 +3149,8 @@ define <2 x i64> @fcmone2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2841,6 +3171,8 @@ define <2 x i32> @fcmord2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmge v2.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2861,6 +3193,8 @@ define <4 x i32> @fcmord4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmge v2.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2881,6 +3215,8 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmge v2.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2904,6 +3240,8 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2926,6 +3264,8 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2948,6 +3288,8 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2970,6 +3312,8 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2992,6 +3336,8 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3014,6 +3360,8 @@ define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3032,6 +3380,8 @@ define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3050,6 +3400,8 @@ define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3068,6 +3420,8 @@ define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3086,6 +3440,8 @@ define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3104,6 +3460,8 @@ define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3121,6 +3479,8 @@ define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3139,6 +3499,8 @@ define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3157,6 +3519,8 @@ define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3175,6 +3539,8 @@ define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3193,6 +3559,8 @@ define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3211,6 +3579,8 @@ define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3229,6 +3599,8 @@ define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3247,6 +3619,8 @@ define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3265,6 +3639,8 @@ define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3283,6 +3659,8 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3298,6 +3676,8 @@ define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
 ; GISEL-LABEL: fcmoeqz2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3313,6 +3693,8 @@ define <4 x i32> @fcmoeqz4xfloat(<4 x float> %A) {
 ; GISEL-LABEL: fcmoeqz4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3327,6 +3709,8 @@ define <2 x i64> @fcmoeqz2xdouble(<2 x double> %A) {
 ; GISEL-LABEL: fcmoeqz2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oeq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3343,6 +3727,8 @@ define <2 x i32> @fcmogez2xfloat(<2 x float> %A) {
 ; GISEL-LABEL: fcmogez2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3358,6 +3744,8 @@ define <4 x i32> @fcmogez4xfloat(<4 x float> %A) {
 ; GISEL-LABEL: fcmogez4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3372,6 +3760,8 @@ define <2 x i64> @fcmogez2xdouble(<2 x double> %A) {
 ; GISEL-LABEL: fcmogez2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp oge <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3387,6 +3777,8 @@ define <2 x i32> @fcmogtz2xfloat(<2 x float> %A) {
 ; GISEL-LABEL: fcmogtz2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3402,6 +3794,8 @@ define <4 x i32> @fcmogtz4xfloat(<4 x float> %A) {
 ; GISEL-LABEL: fcmogtz4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3416,6 +3810,8 @@ define <2 x i64> @fcmogtz2xdouble(<2 x double> %A) {
 ; GISEL-LABEL: fcmogtz2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ogt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3431,6 +3827,8 @@ define <2 x i32> @fcmoltz2xfloat(<2 x float> %A) {
 ; GISEL-LABEL: fcmoltz2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3446,6 +3844,8 @@ define <4 x i32> @fcmoltz4xfloat(<4 x float> %A) {
 ; GISEL-LABEL: fcmoltz4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3461,6 +3861,8 @@ define <2 x i64> @fcmoltz2xdouble(<2 x double> %A) {
 ; GISEL-LABEL: fcmoltz2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp olt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3476,6 +3878,8 @@ define <2 x i32> @fcmolez2xfloat(<2 x float> %A) {
 ; GISEL-LABEL: fcmolez2xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3491,6 +3895,8 @@ define <4 x i32> @fcmolez4xfloat(<4 x float> %A) {
 ; GISEL-LABEL: fcmolez4xfloat:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3506,6 +3912,8 @@ define <2 x i64> @fcmolez2xdouble(<2 x double> %A) {
 ; GISEL-LABEL: fcmolez2xdouble:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ole <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3526,6 +3934,8 @@ define <2 x i32> @fcmonez2xfloat(<2 x float> %A) {
 ; GISEL-NEXT:    fcmgt v1.2s, v0.2s, #0.0
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3546,6 +3956,8 @@ define <4 x i32> @fcmonez4xfloat(<4 x float> %A) {
 ; GISEL-NEXT:    fcmgt v1.4s, v0.4s, #0.0
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3566,6 +3978,8 @@ define <2 x i64> @fcmonez2xdouble(<2 x double> %A) {
 ; GISEL-NEXT:    fcmgt v1.2d, v0.2d, #0.0
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp one <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3586,6 +4000,8 @@ define <2 x i32> @fcmordz2xfloat(<2 x float> %A) {
 ; GISEL-NEXT:    fcmge v1.2s, v0.2s, #0.0
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3606,6 +4022,8 @@ define <4 x i32> @fcmordz4xfloat(<4 x float> %A) {
 ; GISEL-NEXT:    fcmge v1.4s, v0.4s, #0.0
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3626,6 +4044,8 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
 ; GISEL-NEXT:    fcmge v1.2d, v0.2d, #0.0
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ord <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3648,6 +4068,8 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3670,6 +4092,8 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3692,6 +4116,8 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3710,6 +4136,8 @@ define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3728,6 +4156,8 @@ define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3746,6 +4176,8 @@ define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uge <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3764,6 +4196,8 @@ define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3782,6 +4216,8 @@ define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3800,6 +4236,8 @@ define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3818,6 +4256,8 @@ define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3835,6 +4275,8 @@ define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3852,6 +4294,8 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ult <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3870,6 +4314,8 @@ define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3888,6 +4334,8 @@ define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3906,6 +4354,8 @@ define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp ule <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3924,6 +4374,8 @@ define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -3942,6 +4394,8 @@ define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -3960,6 +4414,8 @@ define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp une <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -3982,6 +4438,8 @@ define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4004,6 +4462,8 @@ define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4026,6 +4486,8 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp uno <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4042,6 +4504,8 @@ define <2 x i32> @fcmoeq2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmoeq2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4057,6 +4521,8 @@ define <4 x i32> @fcmoeq4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmoeq4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4071,6 +4537,8 @@ define <2 x i64> @fcmoeq2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmoeq2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4086,6 +4554,8 @@ define <2 x i32> @fcmoge2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmoge2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4101,6 +4571,8 @@ define <4 x i32> @fcmoge4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmoge4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4115,6 +4587,8 @@ define <2 x i64> @fcmoge2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmoge2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4130,6 +4604,8 @@ define <2 x i32> @fcmogt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmogt2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4145,6 +4621,8 @@ define <4 x i32> @fcmogt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmogt4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4159,6 +4637,8 @@ define <2 x i64> @fcmogt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmogt2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4174,6 +4654,8 @@ define <2 x i32> @fcmole2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmole2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4189,6 +4671,8 @@ define <4 x i32> @fcmole4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmole4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4204,6 +4688,8 @@ define <2 x i64> @fcmole2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmole2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4219,6 +4705,8 @@ define <2 x i32> @fcmolt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-LABEL: fcmolt2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4234,6 +4722,8 @@ define <4 x i32> @fcmolt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-LABEL: fcmolt4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4249,6 +4739,8 @@ define <2 x i64> @fcmolt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-LABEL: fcmolt2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4267,6 +4759,8 @@ define <2 x i32> @fcmone2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4285,6 +4779,8 @@ define <4 x i32> @fcmone4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4303,6 +4799,8 @@ define <2 x i64> @fcmone2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4322,6 +4820,8 @@ define <2 x i32> @fcmord2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmge v2.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4341,6 +4841,8 @@ define <4 x i32> @fcmord4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmge v2.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4360,6 +4862,8 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmge v2.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4382,6 +4886,8 @@ define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4403,6 +4909,8 @@ define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4424,6 +4932,8 @@ define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4442,6 +4952,8 @@ define <2 x i32> @fcmueq2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4460,6 +4972,8 @@ define <4 x i32> @fcmueq4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4478,6 +4992,8 @@ define <2 x i64> @fcmueq2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4494,6 +5010,8 @@ define <2 x i32> @fcmuge2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4510,6 +5028,8 @@ define <4 x i32> @fcmuge4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4526,6 +5046,8 @@ define <2 x i64> @fcmuge2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4542,6 +5064,8 @@ define <2 x i32> @fcmugt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4558,6 +5082,8 @@ define <4 x i32> @fcmugt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4574,6 +5100,8 @@ define <2 x i64> @fcmugt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4590,6 +5118,8 @@ define <2 x i32> @fcmule2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4606,6 +5136,8 @@ define <4 x i32> @fcmule4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4622,6 +5154,8 @@ define <2 x i64> @fcmule2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4638,6 +5172,8 @@ define <2 x i32> @fcmult2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4654,6 +5190,8 @@ define <4 x i32> @fcmult4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4670,6 +5208,8 @@ define <2 x i64> @fcmult2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4687,6 +5227,8 @@ define <2 x i32> @fcmune2xfloat_fast(<2 x float> %A, <2 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <2 x float> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4704,6 +5246,8 @@ define <4 x i32> @fcmune4xfloat_fast(<4 x float> %A, <4 x float> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <4 x float> %A, %B
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4721,6 +5265,8 @@ define <2 x i64> @fcmune2xdouble_fast(<2 x double> %A, <2 x double> %B) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <2 x double> %A, %B
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4736,6 +5282,8 @@ define <2 x i32> @fcmoeqz2xfloat_fast(<2 x float> %A) {
 ; GISEL-LABEL: fcmoeqz2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4751,6 +5299,8 @@ define <4 x i32> @fcmoeqz4xfloat_fast(<4 x float> %A) {
 ; GISEL-LABEL: fcmoeqz4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4765,6 +5315,8 @@ define <2 x i64> @fcmoeqz2xdouble_fast(<2 x double> %A) {
 ; GISEL-LABEL: fcmoeqz2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oeq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4781,6 +5333,8 @@ define <2 x i32> @fcmogez2xfloat_fast(<2 x float> %A) {
 ; GISEL-LABEL: fcmogez2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4796,6 +5350,8 @@ define <4 x i32> @fcmogez4xfloat_fast(<4 x float> %A) {
 ; GISEL-LABEL: fcmogez4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4810,6 +5366,8 @@ define <2 x i64> @fcmogez2xdouble_fast(<2 x double> %A) {
 ; GISEL-LABEL: fcmogez2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast oge <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4825,6 +5383,8 @@ define <2 x i32> @fcmogtz2xfloat_fast(<2 x float> %A) {
 ; GISEL-LABEL: fcmogtz2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4840,6 +5400,8 @@ define <4 x i32> @fcmogtz4xfloat_fast(<4 x float> %A) {
 ; GISEL-LABEL: fcmogtz4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4854,6 +5416,8 @@ define <2 x i64> @fcmogtz2xdouble_fast(<2 x double> %A) {
 ; GISEL-LABEL: fcmogtz2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ogt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4869,6 +5433,8 @@ define <2 x i32> @fcmoltz2xfloat_fast(<2 x float> %A) {
 ; GISEL-LABEL: fcmoltz2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4884,6 +5450,8 @@ define <4 x i32> @fcmoltz4xfloat_fast(<4 x float> %A) {
 ; GISEL-LABEL: fcmoltz4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4899,6 +5467,8 @@ define <2 x i64> @fcmoltz2xdouble_fast(<2 x double> %A) {
 ; GISEL-LABEL: fcmoltz2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast olt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4914,6 +5484,8 @@ define <2 x i32> @fcmolez2xfloat_fast(<2 x float> %A) {
 ; GISEL-LABEL: fcmolez2xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4929,6 +5501,8 @@ define <4 x i32> @fcmolez4xfloat_fast(<4 x float> %A) {
 ; GISEL-LABEL: fcmolez4xfloat_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4944,6 +5518,8 @@ define <2 x i64> @fcmolez2xdouble_fast(<2 x double> %A) {
 ; GISEL-LABEL: fcmolez2xdouble_fast:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ole <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -4962,6 +5538,8 @@ define <2 x i32> @fcmonez2xfloat_fast(<2 x float> %A) {
 ; GISEL-NEXT:    fcmgt v1.2s, v0.2s, #0.0
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -4980,6 +5558,8 @@ define <4 x i32> @fcmonez4xfloat_fast(<4 x float> %A) {
 ; GISEL-NEXT:    fcmgt v1.4s, v0.4s, #0.0
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -4998,6 +5578,8 @@ define <2 x i64> @fcmonez2xdouble_fast(<2 x double> %A) {
 ; GISEL-NEXT:    fcmgt v1.2d, v0.2d, #0.0
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast one <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5017,6 +5599,8 @@ define <2 x i32> @fcmordz2xfloat_fast(<2 x float> %A) {
 ; GISEL-NEXT:    fcmge v1.2s, v0.2s, #0.0
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5036,6 +5620,8 @@ define <4 x i32> @fcmordz4xfloat_fast(<4 x float> %A) {
 ; GISEL-NEXT:    fcmge v1.4s, v0.4s, #0.0
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5055,6 +5641,8 @@ define <2 x i64> @fcmordz2xdouble_fast(<2 x double> %A) {
 ; GISEL-NEXT:    fcmge v1.2d, v0.2d, #0.0
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ord <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5073,6 +5661,8 @@ define <2 x i32> @fcmueqz2xfloat_fast(<2 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5091,6 +5681,8 @@ define <4 x i32> @fcmueqz4xfloat_fast(<4 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5109,6 +5701,8 @@ define <2 x i64> @fcmueqz2xdouble_fast(<2 x double> %A) {
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ueq <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5125,6 +5719,8 @@ define <2 x i32> @fcmugez2xfloat_fast(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5141,6 +5737,8 @@ define <4 x i32> @fcmugez4xfloat_fast(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5157,6 +5755,8 @@ define <2 x i64> @fcmugez2xdouble_fast(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uge <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5173,6 +5773,8 @@ define <2 x i32> @fcmugtz2xfloat_fast(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5189,6 +5791,8 @@ define <4 x i32> @fcmugtz4xfloat_fast(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5205,6 +5809,8 @@ define <2 x i64> @fcmugtz2xdouble_fast(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ugt <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5221,6 +5827,8 @@ define <2 x i32> @fcmultz2xfloat_fast(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5237,6 +5845,8 @@ define <4 x i32> @fcmultz4xfloat_fast(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5253,6 +5863,8 @@ define <2 x i64> @fcmultz2xdouble_fast(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ult <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5270,6 +5882,8 @@ define <2 x i32> @fcmulez2xfloat_fast(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5286,6 +5900,8 @@ define <4 x i32> @fcmulez4xfloat_fast(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5302,6 +5918,8 @@ define <2 x i64> @fcmulez2xdouble_fast(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast ule <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5319,6 +5937,8 @@ define <2 x i32> @fcmunez2xfloat_fast(<2 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5336,6 +5956,8 @@ define <4 x i32> @fcmunez4xfloat_fast(<4 x float> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5353,6 +5975,8 @@ define <2 x i64> @fcmunez2xdouble_fast(<2 x double> %A) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast une <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -5374,6 +5998,8 @@ define <2 x i32> @fcmunoz2xfloat_fast(<2 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x float> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -5395,6 +6021,8 @@ define <4 x i32> @fcmunoz4xfloat_fast(<4 x float> %A) {
 ; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <4 x float> %A, zeroinitializer
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -5416,6 +6044,8 @@ define <2 x i64> @fcmunoz2xdouble_fast(<2 x double> %A) {
 ; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
 ; GISEL-NEXT:    ret
   %tmp3 = fcmp fast uno <2 x double> %A, zeroinitializer
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>


        


More information about the llvm-commits mailing list