[llvm] c3c8f00 - [AArch64] Add vselect(fmin/fmax) SVE patterns
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 24 06:55:45 PDT 2023
Author: David Green
Date: 2023-07-24T14:55:38+01:00
New Revision: c3c8f0025a996db52f636492569f1c34ba2758d1
URL: https://github.com/llvm/llvm-project/commit/c3c8f0025a996db52f636492569f1c34ba2758d1
DIFF: https://github.com/llvm/llvm-project/commit/c3c8f0025a996db52f636492569f1c34ba2758d1.diff
LOG: [AArch64] Add vselect(fmin/fmax) SVE patterns
For both minnum/maxnum and minimum/maximum, this adds tablegen patterns for
vselect(fmin/fmax), creating a predicate fminnm/fmaxnm/fmin/fmax nodes.
Differential Revision: https://reviews.llvm.org/D155872
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0a2dbc817184f0..9faf067e8ccecc 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -441,6 +441,10 @@ def AArch64smax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smax, AAr
def AArch64umax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umax, AArch64umax_p>;
def AArch64smin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smin, AArch64smin_p>;
def AArch64umin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umin, AArch64umin_p>;
+def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm, AArch64fminnm_p>;
+def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
+def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
+def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -622,10 +626,10 @@ let Predicates = [HasSVEorSME] in {
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", AArch64fmin_m1, DestructiveBinaryComm>;
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
index d66e588b944487..aa6681b6e0ff9c 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
@@ -1183,9 +1183,8 @@ define <vscale x 4 x float> @minnum_nxv4f32_x(<vscale x 4 x float> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1199,9 +1198,8 @@ define <vscale x 8 x half> @minnum_nxv8f16_x(<vscale x 8 x half> %x, <vscale x 8
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1215,9 +1213,8 @@ define <vscale x 2 x double> @minnum_nxv2f64_x(<vscale x 2 x double> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.d, p1/m, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -1231,9 +1228,8 @@ define <vscale x 4 x float> @maxnum_nxv4f32_x(<vscale x 4 x float> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1247,9 +1243,8 @@ define <vscale x 8 x half> @maxnum_nxv8f16_x(<vscale x 8 x half> %x, <vscale x 8
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1263,9 +1258,8 @@ define <vscale x 2 x double> @maxnum_nxv2f64_x(<vscale x 2 x double> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.d, p1/m, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -1279,9 +1273,8 @@ define <vscale x 4 x float> @minimum_nxv4f32_x(<vscale x 4 x float> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1295,9 +1288,8 @@ define <vscale x 8 x half> @minimum_nxv8f16_x(<vscale x 8 x half> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1311,9 +1303,8 @@ define <vscale x 2 x double> @minimum_nxv2f64_x(<vscale x 2 x double> %x, <vscal
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.d, p1/m, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -1327,9 +1318,8 @@ define <vscale x 4 x float> @maximum_nxv4f32_x(<vscale x 4 x float> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1343,9 +1333,8 @@ define <vscale x 8 x half> @maximum_nxv8f16_x(<vscale x 8 x half> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1359,9 +1348,8 @@ define <vscale x 2 x double> @maximum_nxv2f64_x(<vscale x 2 x double> %x, <vscal
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z0.d, p1/m, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -2688,9 +2676,9 @@ define <vscale x 4 x float> @minnum_nxv4f32_y(<vscale x 4 x float> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2704,9 +2692,9 @@ define <vscale x 8 x half> @minnum_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2720,9 +2708,9 @@ define <vscale x 2 x double> @minnum_nxv2f64_y(<vscale x 2 x double> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -2736,9 +2724,9 @@ define <vscale x 4 x float> @maxnum_nxv4f32_y(<vscale x 4 x float> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2752,9 +2740,9 @@ define <vscale x 8 x half> @maxnum_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2768,9 +2756,9 @@ define <vscale x 2 x double> @maxnum_nxv2f64_y(<vscale x 2 x double> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -2784,9 +2772,9 @@ define <vscale x 4 x float> @minimum_nxv4f32_y(<vscale x 4 x float> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2800,9 +2788,9 @@ define <vscale x 8 x half> @minimum_nxv8f16_y(<vscale x 8 x half> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2816,9 +2804,9 @@ define <vscale x 2 x double> @minimum_nxv2f64_y(<vscale x 2 x double> %x, <vscal
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
@@ -2832,9 +2820,9 @@ define <vscale x 4 x float> @maximum_nxv4f32_y(<vscale x 4 x float> %x, <vscale
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2848,9 +2836,9 @@ define <vscale x 8 x half> @maximum_nxv8f16_y(<vscale x 8 x half> %x, <vscale x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
-; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2864,9 +2852,9 @@ define <vscale x 2 x double> @maximum_nxv2f64_y(<vscale x 2 x double> %x, <vscal
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: not p1.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
+; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
More information about the llvm-commits
mailing list