[llvm] 44271e7 - [AArch64][SVE] Fix lowering of "fcmp ueq/one" when using SVE
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 13 02:24:12 PDT 2022
Author: David Sherwood
Date: 2022-04-13T10:24:03+01:00
New Revision: 44271e7c5582a01ec389231ee98687b0b4557df4
URL: https://github.com/llvm/llvm-project/commit/44271e7c5582a01ec389231ee98687b0b4557df4
DIFF: https://github.com/llvm/llvm-project/commit/44271e7c5582a01ec389231ee98687b0b4557df4.diff
LOG: [AArch64][SVE] Fix lowering of "fcmp ueq/one" when using SVE
We were previously lowering to the incorrect instructions for the
setcc DAG node when using the SETUEQ and SETONE floating point
condition codes. I have fixed this by marking the SETONE code
as Expand and letting the SETUNE code be legal. I have also
fixed up the patterns for FCMNE_PPzZZ and FCMNE_PPzZ0 to use
the correct opcode.
Differential Revision: https://reviews.llvm.org/D121905
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-fcmp.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
llvm/test/CodeGen/AArch64/sve-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 65aff576ed71b..b74583b6d9fb8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1301,7 +1301,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
- setCondCodeAction(ISD::SETUNE, VT, Expand);
+ setCondCodeAction(ISD::SETONE, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
@@ -1556,7 +1556,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
- setCondCodeAction(ISD::SETUNE, VT, Expand);
+ setCondCodeAction(ISD::SETONE, VT, Expand);
}
// Mark integer truncating stores/extending loads as having custom lowering
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 823d8c30625d5..c6bc8f4e6c85b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1493,7 +1493,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -1503,7 +1503,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>;
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>;
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
- defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>;
+ defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 3c752ab5fa25d..6e966d1dccc29 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -50,7 +50,9 @@ define <vscale x 4 x i1> @one(<vscale x 4 x float> %x, <vscale x 4 x float> %x2)
; CHECK-LABEL: one:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp one <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
@@ -69,8 +71,9 @@ define <vscale x 4 x i1> @ueq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2)
; CHECK-LABEL: ueq:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
@@ -119,8 +122,7 @@ define <vscale x 4 x i1> @une(<vscale x 4 x float> %x, <vscale x 4 x float> %x2)
; CHECK-LABEL: une:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp une <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
@@ -147,8 +149,9 @@ define <vscale x 2 x i1> @ueq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float>
; CHECK-LABEL: ueq_2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x float> %x, %x2
ret <vscale x 2 x i1> %y
@@ -166,8 +169,9 @@ define <vscale x 2 x i1> @ueq_2f64(<vscale x 2 x double> %x, <vscale x 2 x doubl
; CHECK-LABEL: ueq_2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fcmne p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x double> %x, %x2
ret <vscale x 2 x i1> %y
@@ -185,8 +189,9 @@ define <vscale x 2 x i1> @ueq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %
; CHECK-LABEL: ueq_2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x half> %x, %x2
ret <vscale x 2 x i1> %y
@@ -204,8 +209,9 @@ define <vscale x 4 x i1> @ueq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %
; CHECK-LABEL: ueq_4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x half> %x, %x2
ret <vscale x 4 x i1> %y
@@ -223,8 +229,9 @@ define <vscale x 8 x i1> @ueq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %
; CHECK-LABEL: ueq_8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 8 x half> %x, %x2
ret <vscale x 8 x i1> %y
@@ -357,7 +364,9 @@ define <vscale x 4 x i1> @one_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: one_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmlt p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp one <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
@@ -365,9 +374,11 @@ define <vscale x 4 x i1> @one_zero(<vscale x 4 x float> %x) {
define <vscale x 4 x i1> @ueq_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ueq_zero:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
@@ -416,8 +427,7 @@ define <vscale x 4 x i1> @une_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: une_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp une <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
index 2ab4535a839a1..2f528666de19b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
@@ -367,10 +367,10 @@ define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fcmeq p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov p1.b, p2/m, p2.b
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
@@ -391,7 +391,9 @@ define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov p1.b, p2/m, p2.b
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
@@ -413,10 +415,8 @@ define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, <16 x half>* %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
index 9f3121b767eb9..27a465fbce56c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
@@ -8,19 +8,16 @@ define i1 @ptest_v16i1_256bit_min_sve(float* %a, float * %b) vscale_range(2, 0)
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: eor z1.d, z2.d, z1.d
+; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: orv b0, p0, z1.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -36,12 +33,10 @@ define i1 @ptest_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0)
; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: orv b0, p0, z0.b
@@ -59,12 +54,10 @@ define i1 @ptest_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
; CHECK-LABEL: ptest_v16i1_512bit_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: orv b0, p0, z0.b
@@ -84,15 +77,11 @@ define i1 @ptest_or_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4,
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: eor z1.d, z2.d, z1.d
+; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: mov p0.b, p1/m, p1.b
+; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: orv b0, p0, z0.b
@@ -122,13 +111,10 @@ define i1 @ptest_and_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4)
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
@@ -153,13 +139,10 @@ define i1 @ptest_and_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4,
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z1.d
-; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index 4e4c659ae676f..857e057eb088f 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -547,8 +547,7 @@ define <vscale x 4 x float> @select_f32_invert_fmul(<vscale x 4 x float> %a, <vs
; CHECK-LABEL: select_f32_invert_fmul:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
@@ -561,8 +560,7 @@ define <vscale x 4 x float> @select_f32_invert_fadd(<vscale x 4 x float> %a, <vs
; CHECK-LABEL: select_f32_invert_fadd:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
More information about the llvm-commits
mailing list