[PATCH] D128582: Move SeparateConstOffsetFromGEPPass() before LSR() and enable EnableGEPOpt by default.
Shubham Narlawar via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 01:13:33 PDT 2022
gsocshubham updated this revision to Diff 446067.
gsocshubham added a comment.
Updated patch using `-U9999999` and fixed review comments.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D128582/new/
https://reviews.llvm.org/D128582
Files:
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/test/CodeGen/AArch64/O3-pipeline.ll
llvm/test/CodeGen/AArch64/cond-br-tuning.ll
Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll
===================================================================
--- llvm/test/CodeGen/AArch64/cond-br-tuning.ll
+++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll
@@ -27,12 +27,13 @@
define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) {
; CHECK-LABEL: test_add_cbz_multiple_use:
; CHECK: // %bb.0: // %common.ret
-; CHECK-NEXT: adds w8, w0, w1
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w8, #10
; CHECK-NEXT: csel w8, wzr, w8, ne
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: ret
%c = add nsw i32 %a, %b
- %d = icmp ne i32 %c, 0
+ %d = icmp ne i32 %c, 10
br i1 %d, label %L1, label %L2
L1:
store i32 0, i32* %ptr, align 4
Index: llvm/test/CodeGen/AArch64/O3-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --debugify-and-strip-all-safe=0 -mtriple=arm64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
; RUN: grep -v "Verify generated machine code" | FileCheck %s
@@ -33,9 +34,20 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Data Prefetch
; CHECK-NEXT: Falkor HW Prefetch Fix
-; CHECK-NEXT: Module Verifier
+; CHECK-NEXT: Split GEPs to a variadic base and a constant offset for better CSE
+; CHECK-NEXT: Early CSE
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT: Function Alias Analysis Results
+; CHECK-NEXT: Memory SSA
; CHECK-NEXT: Canonicalize natural loops
+; CHECK-NEXT: LCSSA Verifier
+; CHECK-NEXT: Loop-Closed SSA Form Pass
+; CHECK-NEXT: Scalar Evolution Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
+; CHECK-NEXT: Loop Pass Manager
+; CHECK-NEXT: Loop Invariant Code Motion
+; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Loop Pass Manager
; CHECK-NEXT: Canonicalize Freeze Instructions in Loops
; CHECK-NEXT: Induction Variable Users
Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -128,7 +128,7 @@
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
- cl::init(false));
+ cl::init(true));
static cl::opt<bool>
BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
@@ -563,17 +563,6 @@
addPass(createFalkorMarkStridedAccessesPass());
}
- TargetPassConfig::addIRPasses();
-
- addPass(createAArch64StackTaggingPass(
- /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
-
- // Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None) {
- addPass(createInterleavedLoadCombinePass());
- addPass(createInterleavedAccessPass());
- }
-
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
@@ -587,6 +576,17 @@
addPass(createLICMPass());
}
+ TargetPassConfig::addIRPasses();
+
+ addPass(createAArch64StackTaggingPass(
+ /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
+
+ // Match interleaved memory accesses to ldN/stN intrinsics.
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createInterleavedLoadCombinePass());
+ addPass(createInterleavedAccessPass());
+ }
+
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D128582.446067.patch
Type: text/x-patch
Size: 4011 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220720/f1c5091b/attachment.bin>
More information about the llvm-commits
mailing list