[llvm] a8de8ca - [AArch64] Add fcmp fast math tests

Cullen Rhodes via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 22 00:54:42 PDT 2022


Author: Cullen Rhodes
Date: 2022-07-22T07:53:55Z
New Revision: a8de8cab7006bc885804e8a2c0a6902702521cfe

URL: https://github.com/llvm/llvm-project/commit/a8de8cab7006bc885804e8a2c0a6902702521cfe
DIFF: https://github.com/llvm/llvm-project/commit/a8de8cab7006bc885804e8a2c0a6902702521cfe.diff

LOG: [AArch64] Add fcmp fast math tests

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D130094

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/neon-compare-instructions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index bd665955eb99d..dcb0ca631c5bd 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -4494,3 +4494,1890 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
   ret <2 x i64> %tmp4
 
 }
+
+define <2 x i32> @fcmoeq2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmoeq2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeq2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoeq4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmoeq4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeq4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoeq2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmoeq2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeq2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoge2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmoge2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoge2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoge4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmoge4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoge4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoge2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmoge2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoge2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmogt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmogt2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogt2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmogt4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogt4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmogt2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogt2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmole2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmole2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmole2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmole4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmole4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    mov s4, v0.s[2]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    mov s3, v1.s[2]
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    mov v2.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v2.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v2.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmole4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmole2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmole2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d2, v1.d[1]
+; CHECK-NEXT:    mov d3, v0.d[1]
+; CHECK-NEXT:    fcmp d3, d2
+; CHECK-NEXT:    csetm x8, le
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    csetm x9, le
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmole2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmolt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmolt2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolt2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmolt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmolt4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    mov s4, v0.s[2]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    mov s3, v1.s[2]
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    mov v2.s[1], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v2.s[2], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    mov v2.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolt4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmolt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmolt2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d2, v1.d[1]
+; CHECK-NEXT:    mov d3, v0.d[1]
+; CHECK-NEXT:    fcmp d3, d2
+; CHECK-NEXT:    csetm x8, lt
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    csetm x9, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolt2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmone2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmone2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmone2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmone4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmone4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmone4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmone2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmone2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmone2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmord2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmord2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmord2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmord4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmord4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmord4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmord2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmord2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmuno2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; CHECK-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    orr v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuno2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.2s, v0.2s, v1.2s
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmuno4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuno4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.4s, v0.4s, v1.4s
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmuno2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; CHECK-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuno2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v2.2d, v0.2d, v1.2d
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmueq2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmueq2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueq2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.2s, v0.2s, v1.2s
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    orr v0.8b, v0.8b, v2.8b
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmueq4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmueq4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueq4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.4s, v0.4s, v1.4s
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmueq2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmueq2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueq2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v2.2d, v0.2d, v1.2d
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    orr v0.16b, v0.16b, v2.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmuge2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmuge2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuge2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmuge4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmuge4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuge4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmuge2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmuge2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmuge2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugt2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmugt2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugt2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v1.2s, v0.2s
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugt4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmugt4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugt4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v1.4s, v0.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmugt2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmugt2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugt2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v1.2d, v0.2d
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmule2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmule2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmule2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmule4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmule4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    mov s4, v0.s[2]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    mov s3, v1.s[2]
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    mov v2.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v2.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v2.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmule4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmule2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmule2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d2, v1.d[1]
+; CHECK-NEXT:    mov d3, v0.d[1]
+; CHECK-NEXT:    fcmp d3, d2
+; CHECK-NEXT:    csetm x8, le
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    csetm x9, le
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmule2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmult2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmult2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmult2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmult4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmult4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    mov s4, v0.s[2]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    mov s3, v1.s[2]
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    mov v2.s[1], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v2.s[2], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    mov v2.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmult4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmult2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmult2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d2, v1.d[1]
+; CHECK-NEXT:    mov d3, v0.d[1]
+; CHECK-NEXT:    fcmp d3, d2
+; CHECK-NEXT:    csetm x8, lt
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    csetm x9, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmult2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmune2xfloat_fast(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmune2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmune2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmune4xfloat_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmune4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmune4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmune2xdouble_fast(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmune2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmune2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoeqz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmoeqz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeqz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoeqz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmoeqz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeqz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoeqz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmoeqz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoeqz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oeq <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmogez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmogez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmogez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmogez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast oge <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmogtz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmogtz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogtz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogtz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmogtz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogtz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogtz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmogtz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmogtz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ogt <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoltz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmoltz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoltz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoltz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmoltz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    mov s2, v0.s[2]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fcmp s2, #0.0
+; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    mov v1.s[1], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov v1.s[2], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    mov v1.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoltz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmoltz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmoltz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcmp d1, #0.0
+; CHECK-NEXT:    csetm x8, lt
+; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    csetm x9, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmoltz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast olt <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmolez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmolez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmolez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmolez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    mov s2, v0.s[2]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s2, #0.0
+; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    mov v1.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov v1.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v1.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmolez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmolez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcmp d1, #0.0
+; CHECK-NEXT:    csetm x8, le
+; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    csetm x9, le
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmolez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ole <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmonez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmonez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmonez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.2s, v0.2s, #0.0
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmonez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmonez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmonez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.4s, v0.4s, #0.0
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmonez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmonez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmonez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.2d, v0.2d, #0.0
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast one <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmordz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmordz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.2s, v0.2s, #0.0
+; CHECK-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmordz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.2s, v0.2s, #0.0
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmordz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmordz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.4s, v0.4s, #0.0
+; CHECK-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmordz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.4s, v0.4s, #0.0
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmordz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmordz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.2d, v0.2d, #0.0
+; CHECK-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmordz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.2d, v0.2d, #0.0
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ord <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmueqz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmueqz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueqz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.2s, v0.2s, #0.0
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmueqz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmueqz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueqz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.4s, v0.4s, #0.0
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmueqz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmueqz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmueqz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v1.2d, v0.2d, #0.0
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ueq <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmugez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmugez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmugez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmugez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uge <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugtz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmugtz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugtz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugtz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmugtz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugtz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmugtz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmugtz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmugtz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmle v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ugt <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmultz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmultz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmultz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmultz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmultz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    mov s2, v0.s[2]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, lt
+; CHECK-NEXT:    fcmp s2, #0.0
+; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    mov v1.s[1], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov v1.s[2], w8
+; CHECK-NEXT:    csetm w8, lt
+; CHECK-NEXT:    mov v1.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmultz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmultz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmultz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcmp d1, #0.0
+; CHECK-NEXT:    csetm x8, lt
+; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    csetm x9, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmultz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ult <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+; ULE with zero = !OGT
+define <2 x i32> @fcmulez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmulez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmulez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmulez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmulez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s1, v0.s[1]
+; CHECK-NEXT:    mov s2, v0.s[2]
+; CHECK-NEXT:    fcmp s1, #0.0
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s2, #0.0
+; CHECK-NEXT:    fmov s1, w9
+; CHECK-NEXT:    mov v1.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    mov v1.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v1.s[3], w8
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmulez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmulez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmulez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcmp d1, #0.0
+; CHECK-NEXT:    csetm x8, le
+; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    csetm x9, le
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmulez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmunez2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmunez2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunez2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmunez4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmunez4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunez4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmunez2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmunez2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunez2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmeq v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast une <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmunoz2xfloat_fast(<2 x float> %A) {
+; CHECK-LABEL: fcmunoz2xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.2s, v0.2s, #0.0
+; CHECK-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunoz2xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.2s, v0.2s, #0.0
+; GISEL-NEXT:    fcmlt v0.2s, v0.2s, #0.0
+; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
+; GISEL-NEXT:    mvn v0.8b, v0.8b
+; GISEL-NEXT:    shl v0.2s, v0.2s, #31
+; GISEL-NEXT:    sshr v0.2s, v0.2s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <2 x float> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmunoz4xfloat_fast(<4 x float> %A) {
+; CHECK-LABEL: fcmunoz4xfloat_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.4s, v0.4s, #0.0
+; CHECK-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunoz4xfloat_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.4s, v0.4s, #0.0
+; GISEL-NEXT:    fcmlt v0.4s, v0.4s, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.4s, v0.4s, #31
+; GISEL-NEXT:    sshr v0.4s, v0.4s, #31
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <4 x float> %A, zeroinitializer
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmunoz2xdouble_fast(<2 x double> %A) {
+; CHECK-LABEL: fcmunoz2xdouble_fast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge v1.2d, v0.2d, #0.0
+; CHECK-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmunoz2xdouble_fast:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmge v1.2d, v0.2d, #0.0
+; GISEL-NEXT:    fcmlt v0.2d, v0.2d, #0.0
+; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    shl v0.2d, v0.2d, #63
+; GISEL-NEXT:    sshr v0.2d, v0.2d, #63
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast uno <2 x double> %A, zeroinitializer
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+
+}
+
+; Test SETCC fast-math flags are propagated when combining zext(setcc).
+define <4 x i32> @fcmule4xfloat_fast_zext(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmule4xfloat_fast_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s3, v1.s[1]
+; CHECK-NEXT:    mov s4, v0.s[1]
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    mov s3, v1.s[2]
+; CHECK-NEXT:    mov s4, v0.s[2]
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s4, s3
+; CHECK-NEXT:    fmov s3, w9
+; CHECK-NEXT:    mov v3.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v3.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v3.s[3], w8
+; CHECK-NEXT:    and v0.16b, v3.16b, v2.16b
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmule4xfloat_fast_zext:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI322_0
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI322_0]
+; GISEL-NEXT:    bic v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <4 x float> %A, %B
+  %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+; Test SETCC fast-math flags are propagated when combining aext(setcc).
+define <4 x i1> @fcmule4xfloat_fast_aext(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmule4xfloat_fast_aext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov s2, v1.s[1]
+; CHECK-NEXT:    mov s3, v0.s[1]
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    mov s2, v1.s[2]
+; CHECK-NEXT:    mov s3, v0.s[2]
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov s1, v1.s[3]
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    csetm w9, le
+; CHECK-NEXT:    fcmp s3, s2
+; CHECK-NEXT:    fmov s4, w9
+; CHECK-NEXT:    mov v4.s[1], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    mov v4.s[2], w8
+; CHECK-NEXT:    csetm w8, le
+; CHECK-NEXT:    mov v4.s[3], w8
+; CHECK-NEXT:    xtn v0.4h, v4.4s
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: fcmule4xfloat_fast_aext:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT:    mvn v0.16b, v0.16b
+; GISEL-NEXT:    xtn v0.4h, v0.4s
+; GISEL-NEXT:    ret
+  %tmp3 = fcmp fast ule <4 x float> %A, %B
+  ret <4 x i1> %tmp3
+}


        


More information about the llvm-commits mailing list