[PATCH] D128582: Enable SeparateConstOffsetFromGEPPass() at -O3 and -O2
Shubham Narlawar via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 25 05:27:53 PDT 2022
gsocshubham created this revision.
gsocshubham added reviewers: momchil.velikov, KyrBoh, fhahn, craig.topper.
Herald added subscribers: StephenFan, hiraditya, arichardson.
Herald added a project: All.
gsocshubham requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Patch registers `SeparateConstOffsetFromGEPPass` as early module pass at both -O2 and -O3.
Resolves - https://github.com/llvm/llvm-project/issues/50528
TODO - Add unit tests
%struct = type { i32, i32, i32 }
define i32 @test1(%struct* %ptr, i64 %idx) {
%gep.1 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 1
%lv.1 = load i32, i32* %gep.1
%c = icmp slt i32 %lv.1, 0
br i1 %c, label %then, label %else
then:
%gep.2 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 2
%lv.2 = load i32, i32* %gep.2
%res = add i32 %lv.1, %lv.2
ret i32 %res
else:
ret i32 0
}
The patch splits GEP and EarlyCSE removes redundant `ptrtoint` from second basic block -
%struct = type { i32, i32, i32 }
; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn
define i32 @test1(%struct* %ptr, i64 %idx) local_unnamed_addr #0 {
%1 = ptrtoint %struct* %ptr to i64
%2 = mul i64 %idx, 12
%3 = add i64 %2, %1
%4 = add i64 %3, 4
%5 = inttoptr i64 %4 to i32*
%lv.1 = load i32, i32* %5, align 4
%c = icmp slt i32 %lv.1, 0
br i1 %c, label %then, label %common.ret
common.ret: ; preds = %0, %then
%common.ret.op = phi i32 [ %res, %then ], [ 0, %0 ]
ret i32 %common.ret.op
then: ; preds = %0
%6 = add i64 %3, 8
%7 = inttoptr i64 %6 to i32*
%lv.2 = load i32, i32* %7, align 4
%res = add i32 %lv.2, %lv.1
br label %common.ret
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind readonly willreturn }
which we get below assembly -
test1: // @test1
// %bb.0:
mov w8, #12
madd x8, x1, x8, x0
ldr w9, [x8, #4]
tbnz w9, #31, .LBB0_2
// %bb.1:
mov w0, wzr
ret
.LBB0_2: // %then
ldr w8, [x8, #8]
add w0, w8, w9
ret
as compared to original assembly without the patch -
// %bb.0:
mov w8, #12
madd x8, x1, x8, x0
ldr w8, [x8, #4]
tbnz w8, #31, .LBB0_2
// %bb.1:
mov w0, wzr
ret
.LBB0_2: // %then
mov w9, #12
madd x9, x1, x9, x0
ldr w9, [x9, #8]
add w0, w8, w9
ret
https://reviews.llvm.org/D128582
Files:
llvm/lib/Passes/PassBuilderPipelines.cpp
llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Index: llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1047,12 +1047,7 @@
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
if (LowerGEP) {
- // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
- // arithmetic operations if the target uses alias analysis in codegen.
- if (TTI.useAA())
- lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
- else
- lowerToArithmetics(GEP, AccumulativeByteOffset);
+ lowerToArithmetics(GEP, AccumulativeByteOffset);
return true;
}
Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
@@ -874,6 +875,8 @@
EarlyFPM.addPass(LowerExpectIntrinsicPass());
EarlyFPM.addPass(SimplifyCFGPass());
EarlyFPM.addPass(SROAPass());
+ if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+ EarlyFPM.addPass(SeparateConstOffsetFromGEPPass(true));
EarlyFPM.addPass(EarlyCSEPass());
if (Level == OptimizationLevel::O3)
EarlyFPM.addPass(CallSiteSplittingPass());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D128582.439975.patch
Type: text/x-patch
Size: 1751 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220625/9ddbf09a/attachment.bin>
More information about the llvm-commits
mailing list