[PATCH] D128582: Enable SeparateConstOffsetFromGEPPass() at -O3 and -O2

Sat Jun 25 05:27:53 PDT 2022

gsocshubham created this revision.
gsocshubham added reviewers: momchil.velikov, KyrBoh, fhahn, craig.topper.
Herald added subscribers: StephenFan, hiraditya, arichardson.
Herald added a project: All.
gsocshubham requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Patch registers `SeparateConstOffsetFromGEPPass` as early module pass at both -O2 and -O3.

Resolves - https://github.com/llvm/llvm-project/issues/50528

TODO - Add unit tests

  %struct = type { i32, i32, i32 }
  
  define i32 @test1(%struct* %ptr, i64 %idx) {
   %gep.1 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 1
   %lv.1 = load i32, i32* %gep.1
   %c = icmp slt i32 %lv.1, 0
   br i1 %c, label %then, label %else
  
  then:
   %gep.2 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 2
   %lv.2 = load i32, i32* %gep.2
   %res = add i32 %lv.1, %lv.2
   ret i32 %res
  
  
  else:
   ret i32 0
  }

The patch splits GEP and EarlyCSE removes redundant `ptrtoint` from second basic block -

  %struct = type { i32, i32, i32 }
  
  ; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn
  define i32 @test1(%struct* %ptr, i64 %idx) local_unnamed_addr #0 {
    %1 = ptrtoint %struct* %ptr to i64
    %2 = mul i64 %idx, 12
    %3 = add i64 %2, %1
    %4 = add i64 %3, 4
    %5 = inttoptr i64 %4 to i32*
    %lv.1 = load i32, i32* %5, align 4
    %c = icmp slt i32 %lv.1, 0
    br i1 %c, label %then, label %common.ret
  
  common.ret:                                       ; preds = %0, %then
    %common.ret.op = phi i32 [ %res, %then ], [ 0, %0 ]
    ret i32 %common.ret.op
  
  then:                                             ; preds = %0
    %6 = add i64 %3, 8
    %7 = inttoptr i64 %6 to i32*
    %lv.2 = load i32, i32* %7, align 4
    %res = add i32 %lv.2, %lv.1
    br label %common.ret
  }
  
  attributes #0 = { mustprogress nofree norecurse nosync nounwind readonly willreturn }

which we get below assembly -

  test1:                                  // @test1
  // %bb.0:
          mov     w8, #12
          madd    x8, x1, x8, x0
          ldr     w9, [x8, #4]
          tbnz    w9, #31, .LBB0_2
  // %bb.1:
          mov     w0, wzr
          ret
  .LBB0_2:                                // %then
          ldr     w8, [x8, #8]
          add     w0, w8, w9
          ret

as compared to original assembly without the patch -

  // %bb.0:
          mov     w8, #12
          madd    x8, x1, x8, x0
          ldr     w8, [x8, #4]
          tbnz    w8, #31, .LBB0_2
  // %bb.1:
          mov     w0, wzr
          ret
  .LBB0_2:                                // %then
          mov     w9, #12
          madd    x9, x1, x9, x0
          ldr     w9, [x9, #8]
          add     w0, w8, w9
          ret


https://reviews.llvm.org/D128582

Files:
  llvm/lib/Passes/PassBuilderPipelines.cpp
  llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp


Index: llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================

--- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1047,12 +1047,7 @@
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
-    // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
-    // arithmetic operations if the target uses alias analysis in codegen.
-    if (TTI.useAA())
-      lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
-    else
-      lowerToArithmetics(GEP, AccumulativeByteOffset);
+    lowerToArithmetics(GEP, AccumulativeByteOffset);
     return true;
   }
 
Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Scalar/Reassociate.h"
 #include "llvm/Transforms/Scalar/SCCP.h"
 #include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
@@ -874,6 +875,8 @@
   EarlyFPM.addPass(LowerExpectIntrinsicPass());
   EarlyFPM.addPass(SimplifyCFGPass());
   EarlyFPM.addPass(SROAPass());
+  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+    EarlyFPM.addPass(SeparateConstOffsetFromGEPPass(true));
   EarlyFPM.addPass(EarlyCSEPass());
   if (Level == OptimizationLevel::O3)
     EarlyFPM.addPass(CallSiteSplittingPass());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D128582.439975.patch
Type: text/x-patch
Size: 1751 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220625/9ddbf09a/attachment.bin>