[llvm] f7e1efe - [LLVM][ISel][SVE] Add patterns for merging reverse subtracts. (#101488)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 04:16:54 PDT 2024
Author: Paul Walker
Date: 2024-08-07T12:16:50+01:00
New Revision: f7e1efe8bcea14686d1c303e058f687b6ca1c79f
URL: https://github.com/llvm/llvm-project/commit/f7e1efe8bcea14686d1c303e058f687b6ca1c79f
DIFF: https://github.com/llvm/llvm-project/commit/f7e1efe8bcea14686d1c303e058f687b6ca1c79f.diff
LOG: [LLVM][ISel][SVE] Add patterns for merging reverse subtracts. (#101488)
vselect cond, ([f]sub b, a), a ==> [f]subr cond, a, b
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 19c03011e07b2..209830c7dc4ab 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -279,6 +279,10 @@ def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(int_aarch64_sve_fsub node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2), node:$op1)
]>;
+def AArch64fsubr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
+ (int_aarch64_sve_fsubr node:$pg, node:$op1, node:$op2),
+ (vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op2, node:$op1), node:$op1)
+]>;
def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
[(int_aarch64_sve_shadd node:$pg, node:$op1, node:$op2),
@@ -423,6 +427,11 @@ def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
(sub node:$op2, node:$op1)>;
+
+def AArch64subr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_subr node:$pg, node:$op1, node:$op2),
+ (vselect node:$pg, (sub node:$op2, node:$op1), node:$op1)]>;
+
def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
(vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
@@ -529,7 +538,7 @@ let Predicates = [HasSVEorSME] in {
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", AArch64add_m1, DestructiveBinaryComm>;
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
- defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
+ defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", AArch64subr_m1, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", AArch64orr_m1, DestructiveBinaryComm>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", AArch64eor_m1, DestructiveBinaryComm>;
@@ -685,7 +694,7 @@ let Predicates = [HasSVEorSME] in {
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
- defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", AArch64fsubr_m1, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>;
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>;
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>;
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
index 2541910e080e3..adbdee0eb0847 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
@@ -1505,9 +1505,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1520,9 +1520,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1535,9 +1535,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1550,9 +1550,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -2517,10 +2517,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2533,10 +2533,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2549,10 +2549,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
index bafd5abcc7b23..6607f9c3b368e 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
@@ -932,9 +932,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -947,9 +947,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -962,9 +962,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -977,9 +977,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1588,10 +1588,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1604,10 +1604,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1620,10 +1620,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
More information about the llvm-commits
mailing list