[llvm] [GlobalISel] Combine (X == 0) & (Y == 0) -> (X | Y) == 0 (PR #71949)
Dávid Ferenc Szabó via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 09:26:42 PST 2024
dfszabo wrote:
> > Very recently got merged this #82733, which will insert G_FREEZE before the G_OR (https://godbolt.org/z/dcr83rfM6) causing now the above patterns not to work on cases like in `cmp-chains.ll`, losing those opt opportunities. I am not sure if G_FREEZE should be incorporated into the pattern or not.
>
> You can try the equivalent transform in alive2 on the IR to see if it's valid
Nice tool. Seems like these are valid: https://alive2.llvm.org/ce/z/3AHtx3, https://alive2.llvm.org/ce/z/W8uR9j. With these changes the gains coming back and even improving the regression caused. But I think this might need a separate PR.
```diff
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 18db7a819540..66d3c14547c2 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -952,6 +952,21 @@ def redundant_binop_in_equality : GICombineRule<
[{ return Helper.matchRedundantBinOpInEquality(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def remove_freeze_of_constants: GICombineRule <
+ (defs root:$dst),
+ (match (G_FREEZE $dst, $src),
+ [{ return !!isConstantOrConstantSplatVector(*MRI.getVRegDef(${src}.getReg()), MRI); }]),
+ (apply (GIReplaceReg $dst, $src))>;
+
+def canonicalize_icmp_freeze: GICombineRule<
+ (defs root:$root),
+ (match (G_ICMP $dst, $p, $src1, $src2),
+ (G_FREEZE $root, $dst)),
+ (apply (G_FREEZE $new_src1, $src1),
+ (G_FREEZE $new_src2, $src2),
+ (G_ICMP $root, $p, $new_src1, $new_src2))
+>;
+
// Transform: (X == 0 & Y == 0) -> (X | Y) == 0
def double_icmp_zero_and_combine: GICombineRule<
(defs root:$root),
@@ -980,6 +995,8 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
+def freeze_combines : GICombineGroup<[remove_freeze_of_constants, canonicalize_icmp_freeze]>;
+
def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
double_icmp_zero_or_combine]>;
@@ -1374,7 +1391,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine]>;
+ combine_concat_vector, double_icmp_zero_and_or_combine, freeze_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 1d9f39e51859..4eaa25608d16 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -109,8 +109,7 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
; GISEL-NEXT: cset w8, lo
; GISEL-NEXT: cmp w2, w3
; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ne i32 %2, %3
@@ -138,8 +137,7 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; GISEL-NEXT: cmp w4, w5
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
@@ -173,8 +171,7 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
+; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%9 = icmp ult i32 %0, %1
%10 = icmp ugt i32 %2, %3
@@ -189,22 +186,12 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; (x0 != 0) || (x1 != 0)
define i32 @true_or2(i32 %0, i32 %1) {
-; SDISEL-LABEL: true_or2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: true_or2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-LABEL: true_or2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
%3 = icmp ne i32 %0, 0
%4 = icmp ne i32 %1, 0
%5 = select i1 %3, i1 true, i1 %4
@@ -214,26 +201,13 @@ define i32 @true_or2(i32 %0, i32 %1) {
; (x0 != 0) || (x1 != 0) || (x2 != 0)
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
-; SDISEL-LABEL: true_or3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: orr w8, w8, w2
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: true_or3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: cmp w2, #0
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-LABEL: true_or3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: orr w8, w8, w2
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
%4 = icmp ne i32 %0, 0
%5 = icmp ne i32 %1, 0
%6 = select i1 %4, i1 true, i1 %5
@@ -242,5 +216,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
%9 = zext i1 %8 to i32
ret i32 %9
}
```
https://github.com/llvm/llvm-project/pull/71949
More information about the llvm-commits
mailing list