[llvm] 6314a72 - AMDGPU/GlobalISel: Enable CSE in pre-legalizer combiner

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 21 07:21:49 PDT 2021


Author: Matt Arsenault
Date: 2021-03-21T10:07:37-04:00
New Revision: 6314a727308a76b9ef8783d69797ce3bead096ff

URL: https://github.com/llvm/llvm-project/commit/6314a727308a76b9ef8783d69797ce3bead096ff
DIFF: https://github.com/llvm/llvm-project/commit/6314a727308a76b9ef8783d69797ce3bead096ff.diff

LOG: AMDGPU/GlobalISel: Enable CSE in pre-legalizer combiner

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 0ca0ea18551a..c58b15f0eb94 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -249,6 +249,9 @@ void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<MachineDominatorTree>();
     AU.addPreserved<MachineDominatorTree>();
   }
+
+  AU.addRequired<GISelCSEAnalysisWrapperPass>();
+  AU.addPreserved<GISelCSEAnalysisWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -270,8 +273,13 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
   AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
                                         F.hasMinSize(), KB, MDT);
+  // Enable CSE.
+  GISelCSEAnalysisWrapper &Wrapper =
+      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
+
   Combiner C(PCInfo, TPC);
-  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+  return C.combineMachineInstrs(MF, CSEInfo);
 }
 
 char AMDGPUPreLegalizerCombiner::ID = 0;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir
index 93b723c5e730..44ef61c53755 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir
@@ -29,10 +29,8 @@ body:             |
 
     ; GCN-LABEL: name: urem_s32_var_const1
     ; GCN: liveins: $vgpr0
-    ; GCN: %const:_(s32) = G_CONSTANT i32 1
-    ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]]
-    ; GCN: $vgpr0 = COPY [[ADD]](s32)
+    ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GCN: $vgpr0 = COPY [[C]](s32)
     %var:_(s32) = COPY $vgpr0
     %const:_(s32) = G_CONSTANT i32 1
     %rem:_(s32) = G_UREM %var, %const
@@ -49,10 +47,8 @@ body:             |
     ; GCN-LABEL: name: urem_s32_var_const2
     ; GCN: liveins: $vgpr0
     ; GCN: %var:_(s32) = COPY $vgpr0
-    ; GCN: %const:_(s32) = G_CONSTANT i32 2
-    ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]]
-    ; GCN: %rem:_(s32) = G_AND %var, [[ADD]]
+    ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GCN: %rem:_(s32) = G_AND %var, [[C]]
     ; GCN: $vgpr0 = COPY %rem(s32)
     %var:_(s32) = COPY $vgpr0
     %const:_(s32) = G_CONSTANT i32 2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 6d4ffa6db73b..f3dc84b12c5e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -207,21 +207,28 @@ define i32 @v_urem_i32_pow2k_denom(i32 %num) {
 ; CHECK-LABEL: v_urem_i32_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_add_i32 s4, 0x1000, -1
-; CHECK-NEXT:    v_and_b32_e32 v0, s4, v0
+; CHECK-NEXT:    v_and_b32_e32 v0, 0xfff, v0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem i32 %num, 4096
   ret i32 %result
 }
 
 define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_add_i32 s4, 0x1000, -1
-; CHECK-NEXT:    v_and_b32_e32 v0, s4, v0
-; CHECK-NEXT:    v_and_b32_e32 v1, s4, v1
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_add_i32 s4, 0x1000, -1
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i32_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0xfff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 6219bc0f19b8..7411807ad755 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -949,38 +949,43 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_urem_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_add_u32 s4, 0x1000, -1
-; CHECK-NEXT:    s_cselect_b32 s5, 1, 0
-; CHECK-NEXT:    s_and_b32 s5, s5, 1
-; CHECK-NEXT:    s_cmp_lg_u32 s5, 0
-; CHECK-NEXT:    s_addc_u32 s5, 0, -1
-; CHECK-NEXT:    v_and_b32_e32 v0, s4, v0
-; CHECK-NEXT:    v_and_b32_e32 v1, s5, v1
+; CHECK-NEXT:    v_and_b32_e32 v0, 0xfff, v0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem i64 %num, 4096
   ret i64 %result
 }
 
 define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
-; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s4, 0x1000
-; CHECK-NEXT:    s_add_u32 s5, s4, -1
-; CHECK-NEXT:    s_cselect_b32 s6, 1, 0
-; CHECK-NEXT:    s_and_b32 s6, s6, 1
-; CHECK-NEXT:    s_cmp_lg_u32 s6, 0
-; CHECK-NEXT:    s_addc_u32 s6, 0, -1
-; CHECK-NEXT:    s_add_u32 s4, s4, -1
-; CHECK-NEXT:    s_cselect_b32 s7, 1, 0
-; CHECK-NEXT:    v_and_b32_e32 v0, s5, v0
-; CHECK-NEXT:    s_and_b32 s5, s7, 1
-; CHECK-NEXT:    v_and_b32_e32 v1, s6, v1
-; CHECK-NEXT:    s_cmp_lg_u32 s5, 0
-; CHECK-NEXT:    s_addc_u32 s5, 0, -1
-; CHECK-NEXT:    v_and_b32_e32 v2, s4, v2
-; CHECK-NEXT:    v_and_b32_e32 v3, s5, v3
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s4, 0x1000
+; GISEL-NEXT:    s_add_u32 s5, s4, -1
+; GISEL-NEXT:    s_cselect_b32 s6, 1, 0
+; GISEL-NEXT:    s_and_b32 s6, s6, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s6, 0
+; GISEL-NEXT:    s_addc_u32 s6, 0, -1
+; GISEL-NEXT:    s_add_u32 s4, s4, -1
+; GISEL-NEXT:    s_cselect_b32 s7, 1, 0
+; GISEL-NEXT:    v_and_b32_e32 v0, s5, v0
+; GISEL-NEXT:    s_and_b32 s5, s7, 1
+; GISEL-NEXT:    v_and_b32_e32 v1, s6, v1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    s_addc_u32 s5, 0, -1
+; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, s5, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0xfff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_mov_b32_e32 v3, 0
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i64> %num, <i64 4096, i64 4096>
   ret <2 x i64> %result
 }


        


More information about the llvm-commits mailing list