[llvm] 3870857 - [SVE] Restrict cmp+and->pred_cmp isel to instances where the and is the sole user of the compare.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 13 05:44:12 PDT 2023
Author: Paul Walker
Date: 2023-03-13T12:42:00Z
New Revision: 3870857226b68fe19e97969e256e31c5eb681c04
URL: https://github.com/llvm/llvm-project/commit/3870857226b68fe19e97969e256e31c5eb681c04
DIFF: https://github.com/llvm/llvm-project/commit/3870857226b68fe19e97969e256e31c5eb681c04.diff
LOG: [SVE] Restrict cmp+and->pred_cmp isel to instances where the and is the sole user of the compare.
Without the single use restriction we may replace the and with a
more costly duplicated compare.
Differential Revision: https://reviews.llvm.org/D145755
Added:
Modified:
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-fcmp.ll
llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 71995fc26fb4..4c97ae88f192 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -47,6 +47,10 @@ def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
]>;
def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>;
+def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc),
+ (AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
def SVEPatternOperand : AsmOperandClass {
let Name = "SVEPattern";
@@ -5028,9 +5032,9 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
(cmp $Op1, $Op2, $Op3)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
(cmp $Op1, $Op3, $Op2)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
(cmp $Pg, $Op2, $Op3)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
(cmp $Pg, $Op3, $Op2)>;
}
@@ -5040,9 +5044,9 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
(cmp $Op1, $Op2)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
(cmp $Op1, $Op2)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
(cmp $Pg, $Op1)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
(cmp $Pg, $Op1)>;
}
@@ -5126,13 +5130,13 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
commuted_cc)),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
- (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt ZPR:$Zs1),
(intvt (splat_vector (immtype:$imm))),
cc))),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
- (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt (splat_vector (immtype:$imm))),
(intvt ZPR:$Zs1),
commuted_cc))),
diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 5cb44b1ee2e7..35cbe65c6a8b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -493,8 +493,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x
; CHECK-LABEL: and_of_multiuse_fcmp_ogt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, %y
%and = and <vscale x 4 x i1> %pg, %cmp
@@ -507,8 +507,8 @@ define %svboolx2 @and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x
; CHECK-LABEL: and_of_multiuse_fcmp_ogt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
%and = and <vscale x 4 x i1> %pg, %cmp
@@ -521,8 +521,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x
; CHECK-LABEL: and_of_multiuse_fcmp_olt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, %y
%and = and <vscale x 4 x i1> %pg, %cmp
@@ -535,8 +535,8 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
; CHECK-LABEL: and_of_multiuse_fcmp_olt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
%and = and <vscale x 4 x i1> %pg, %cmp
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
index 2e6f704975bd..2d4d0b74a08e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -23,12 +23,11 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
; CHECK-NEXT: sxtw z5.d, p0/m, z6.d
; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d
; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT: ld1w { z5.d }, p1/z, [x1]
+; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
-; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
-; CHECK-NEXT: not p2.b, p0/z, p2.b
-; CHECK-NEXT: cmpne p2.d, p2/z, z4.d, #0
+; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
; CHECK-NEXT: add z2.d, p1/m, z2.d, z1.d
; CHECK-NEXT: uaddv d0, p0, z2.d
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
index ee5969e85437..a995823fe446 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -1223,8 +1223,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle(<vscale x 4 x i1> %a, <vscale x 4 x i
; CHECK-LABEL: and_of_multiuse_icmp_sle:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: cmpge p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = icmp sle <vscale x 4 x i32> %b, %c
%and = and <vscale x 4 x i1> %a, %cmp
@@ -1237,8 +1237,8 @@ define %svboolx2 @and_of_multiuse_icmp_sle_imm(<vscale x 4 x i1> %a, <vscale x 4
; CHECK-LABEL: and_of_multiuse_icmp_sle_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmple p0.s, p0/z, z0.s, #1
; CHECK-NEXT: cmple p1.s, p1/z, z0.s, #1
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp sle <vscale x 4 x i32> %b, %imm
@@ -1252,8 +1252,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt(<vscale x 4 x i1> %a, <vscale x 4 x i
; CHECK-LABEL: and_of_multiuse_icmp_ugt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 4 x i32> %b, %c
%and = and <vscale x 4 x i1> %a, %cmp
@@ -1266,8 +1266,8 @@ define %svboolx2 @and_of_multiuse_icmp_ugt_imm(<vscale x 4 x i1> %a, <vscale x 4
; CHECK-LABEL: and_of_multiuse_icmp_ugt_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, #1
; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, #1
+; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%imm = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i64 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 4 x i32> %b, %imm
More information about the llvm-commits
mailing list