[llvm] [GlobalISel] Combine (X == 0) & (Y == 0) -> (X | Y) == 0 (PR #71949)

Wed Feb 28 09:26:42 PST 2024

dfszabo wrote:

> > Very recently got merged this #82733, which will insert G_FREEZE before the G_OR (https://godbolt.org/z/dcr83rfM6) causing now the above patterns not to work on cases like in `cmp-chains.ll`, losing those opt opportunities. I am not sure if G_FREEZE should be incorporated into the pattern or not.
> 
> You can try the equivalent transform in alive2 on the IR to see if it's valid

Nice tool. Seems like these are valid: https://alive2.llvm.org/ce/z/3AHtx3, https://alive2.llvm.org/ce/z/W8uR9j. With these changes the gains coming back and even improving the regression caused. But I think this might need a separate PR.

```diff

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 18db7a819540..66d3c14547c2 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -952,6 +952,21 @@ def redundant_binop_in_equality : GICombineRule<
          [{ return Helper.matchRedundantBinOpInEquality(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def remove_freeze_of_constants: GICombineRule <
+  (defs root:$dst),
+  (match (G_FREEZE $dst, $src),
+         [{ return !!isConstantOrConstantSplatVector(*MRI.getVRegDef(${src}.getReg()), MRI); }]),
+  (apply (GIReplaceReg $dst, $src))>;
+
+def canonicalize_icmp_freeze: GICombineRule<
+  (defs root:$root),
+  (match (G_ICMP $dst, $p, $src1, $src2),
+         (G_FREEZE $root, $dst)),
+  (apply (G_FREEZE $new_src1, $src1),
+         (G_FREEZE $new_src2, $src2),
+         (G_ICMP $root, $p, $new_src1, $new_src2))
+>;
+
 // Transform: (X == 0 & Y == 0) -> (X | Y) == 0
 def double_icmp_zero_and_combine: GICombineRule<
   (defs root:$root),
@@ -980,6 +995,8 @@ def double_icmp_zero_or_combine: GICombineRule<
          (G_ICMP $root, $p, $ordst, 0))
 >;
 
+def freeze_combines : GICombineGroup<[remove_freeze_of_constants, canonicalize_icmp_freeze]>;
+
 def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
                                                       double_icmp_zero_or_combine]>;
 
@@ -1374,7 +1391,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, 
-    combine_concat_vector, double_icmp_zero_and_or_combine]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine, freeze_combines]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 1d9f39e51859..4eaa25608d16 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -109,8 +109,7 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
 ; GISEL-NEXT:    cset w8, lo
 ; GISEL-NEXT:    cmp w2, w3
 ; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %5 = icmp ult i32 %0, %1
   %6 = icmp ne i32 %2, %3
@@ -138,8 +137,7 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
 ; GISEL-NEXT:    cmp w4, w5
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %7 = icmp ult i32 %0, %1
   %8 = icmp ugt i32 %2, %3
@@ -173,8 +171,7 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    cset w11, eq
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %9 = icmp ult i32 %0, %1
   %10 = icmp ugt i32 %2, %3
@@ -189,22 +186,12 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
 
 ; (x0 != 0) || (x1 != 0)
 define i32 @true_or2(i32 %0, i32 %1) {
-; SDISEL-LABEL: true_or2:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    orr w8, w0, w1
-; SDISEL-NEXT:    cmp w8, #0
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: true_or2:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: true_or2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %3 = icmp ne i32 %0, 0
   %4 = icmp ne i32 %1, 0
   %5 = select i1 %3, i1 true, i1 %4
@@ -214,26 +201,13 @@ define i32 @true_or2(i32 %0, i32 %1) {
 
 ; (x0 != 0) || (x1 != 0) || (x2 != 0)
 define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
-; SDISEL-LABEL: true_or3:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    orr w8, w0, w1
-; SDISEL-NEXT:    orr w8, w8, w2
-; SDISEL-NEXT:    cmp w8, #0
-; SDISEL-NEXT:    cset w0, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: true_or3:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    cmp w2, #0
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w0, w8, #0x1
-; GISEL-NEXT:    ret
+; CHECK-LABEL: true_or3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    orr w8, w8, w2
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %4 = icmp ne i32 %0, 0
   %5 = icmp ne i32 %1, 0
   %6 = select i1 %4, i1 true, i1 %5
@@ -242,5 +216,3 @@ define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
   %9 = zext i1 %8 to i32
   ret i32 %9
 }

```

https://github.com/llvm/llvm-project/pull/71949