[llvm] 712d35b - GlobalISel: Fold some idempotent operations

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 16 07:44:51 PST 2022


Author: Matt Arsenault
Date: 2022-11-16T07:44:46-08:00
New Revision: 712d35b417dd1beb373c51a9076db18811433d59

URL: https://github.com/llvm/llvm-project/commit/712d35b417dd1beb373c51a9076db18811433d59
DIFF: https://github.com/llvm/llvm-project/commit/712d35b417dd1beb373c51a9076db18811433d59.diff

LOG: GlobalISel: Fold some idempotent operations

This makes the existing fabs_fabs fold redundant, which
I thought was using more tablegen matching, but apparently not.
I'm not sure how to make match work with multiple opcodes. There
are a few more this could handle, but these are the ones that
legalization are more likely to introduce.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir

Modified: 
    llvm/include/llvm/Target/GlobalISel/Combine.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index dd5d929e615c0..a737b1cd15063 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -131,6 +131,18 @@ def copy_prop : GICombineRule<
          [{ return Helper.matchCombineCopy(*${mi}); }]),
   (apply [{ Helper.applyCombineCopy(*${mi}); }])>;
 
+// idempotent operations
+// Fold (freeze (freeze x)) -> (freeze x).
+// Fold (fabs (fabs x)) -> (fabs x).
+// Fold (fcanonicalize (fcanonicalize x)) -> (fcanonicalize x).
+def idempotent_prop : GICombineRule<
+   (defs root:$mi),
+   (match (wip_match_opcode G_FREEZE, G_FABS, G_FCANONICALIZE):$mi,
+          [{ return MRI.getVRegDef(${mi}->getOperand(1).getReg())->getOpcode() ==
+                    ${mi}->getOpcode(); }]),
+   (apply [{ Helper.replaceSingleDefInstWithOperand(*${mi}, 1); }])>;
+
+
 def extending_loads : GICombineRule<
   (defs root:$root, extending_load_matchdata:$matchinfo),
   (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
@@ -565,15 +577,6 @@ def merge_unmerge : GICombineRule<
   (apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${matchinfo}); }])
 >;
 
-// Fold (fabs (fabs x)) -> (fabs x).
-def fabs_fabs_fold: GICombineRule<
-  (defs root:$dst, register_matchinfo:$matchinfo),
-  (match (G_FABS $abs, $src),
-         (G_FABS $dst, $abs):$mi,
-         [{ ${matchinfo} = ${abs}.getReg(); }]),
-  (apply [{ return Helper.replaceSingleDefInstWithReg(*${mi}, ${matchinfo}); }])
->;
-
 // Fold (fabs (fneg x)) -> (fabs x).
 def fabs_fneg_fold: GICombineRule <
   (defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1038,7 +1041,7 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
                                       select_to_logical]>;
 
 def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
-                                       mul_by_neg_one]>;
+                                       mul_by_neg_one, idempotent_prop]>;
 
 def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
   combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
@@ -1055,7 +1058,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     width_reduction_combines, select_combines,
     known_bits_simplifications, ext_ext_fold,
     not_cmp_fold, opt_brcond_by_inverting_cond,
-    unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
+    unmerge_merge, unmerge_cst, unmerge_dead_to_trunc,
     unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir
new file mode 100644
index 0000000000000..cd3c0053d3784
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-fcanonicalize.mir
@@ -0,0 +1,96 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: test_fcanonicalize_fcanonicalize_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_fcanonicalize_fcanonicalize_fcanonicalize_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_fcanonicalize_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0
+    %fcanonicalize2:_(s32) = G_FCANONICALIZE %fcanonicalize1
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %fcanonicalize2, %ptr :: (store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_fcanonicalize_fcanonicalize_s32_multi_use
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fcanonicalize_fcanonicalize_s32_multi_use
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %ptr:_(p1) = G_IMPLICIT_DEF
+
+    %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    G_STORE %fcanonicalize0, %ptr :: (volatile store (s32), addrspace 1, align 4)
+
+    %fcanonicalize1:_(s32) = G_FCANONICALIZE %fcanonicalize0
+    G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4)
+
+    G_STORE %fcanonicalize1, %ptr :: (volatile store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_fcanonicalize_copy_fcanonicalize_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fcanonicalize_copy_fcanonicalize_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %fcanonicalize0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %fcanonicalize0:_(s32) = G_FCANONICALIZE %src0
+    %copy:_(s32) = COPY %fcanonicalize0
+    %fcanonicalize1:_(s32) = G_FCANONICALIZE %copy
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %fcanonicalize1, %ptr :: (store (s32), addrspace 1, align 4)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir
new file mode 100644
index 0000000000000..9f6d85baddc32
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-freeze.mir
@@ -0,0 +1,96 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: test_freeze_freeze_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_freeze_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %freeze0:_(s32) = G_FREEZE %src0
+    %freeze1:_(s32) = G_FREEZE %freeze0
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_freeze_freeze_freeze_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_freeze_freeze_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %freeze0:_(s32) = G_FREEZE %src0
+    %freeze1:_(s32) = G_FREEZE %freeze0
+    %freeze2:_(s32) = G_FREEZE %freeze1
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %freeze2, %ptr :: (store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_freeze_freeze_s32_multi_use
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_freeze_freeze_s32_multi_use
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (volatile store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %ptr:_(p1) = G_IMPLICIT_DEF
+
+    %freeze0:_(s32) = G_FREEZE %src0
+    G_STORE %freeze0, %ptr :: (volatile store (s32), addrspace 1, align 4)
+
+    %freeze1:_(s32) = G_FREEZE %freeze0
+    G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4)
+
+    G_STORE %freeze1, %ptr :: (volatile store (s32), addrspace 1, align 4)
+...
+
+---
+name: test_freeze_copy_freeze_s32
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_copy_freeze_s32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %src0:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %freeze0:_(s32) = G_FREEZE %src0
+    ; CHECK-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: G_STORE %freeze0(s32), %ptr(p1) :: (store (s32), addrspace 1)
+    %src0:_(s32) = COPY $vgpr0
+    %freeze0:_(s32) = G_FREEZE %src0
+    %copy:_(s32) = COPY %freeze0
+    %freeze1:_(s32) = G_FREEZE %copy
+    %ptr:_(p1) = G_IMPLICIT_DEF
+    G_STORE %freeze1, %ptr :: (store (s32), addrspace 1, align 4)
+...


        


More information about the llvm-commits mailing list