[llvm] [IPSCCP] Push constant struct params into callee's (PR #111805)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 02:13:51 PDT 2024
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/111805
This patch pushes constant alloca params into single use callees. This is beneficial in large functions emitted by fortran "box passing" which can combine multiple constant alloca parameters into one large alloca, which then results in duplicate unnecessary constant stores and less clear optimization paths.
This only works for CallInsts that have a single use with an alloca parameter, that of which's users are only stores and GEP's that are stored to one layer deep.
>From e32c224b7e14a486fd37143faa276720fd11d1d1 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Fri, 27 Sep 2024 12:05:33 +0000
Subject: [PATCH] [IPSCCP] Push constant struct params into callee's
This patch pushes constant alloca params into single use callees. This is
beneficial in large functions emitted by fortran "box passing" which can
combine multiple constant alloca parameters into one large alloca, which then
results in duplicate unnecessary constant stores and less clear optimization
paths.
This only works for CallInsts that have a single use with an alloca parameter,
that of which's users are only stores and GEP's that are stored to one layer
deep.
---
llvm/lib/Transforms/IPO/SCCP.cpp | 120 ++++++++++++++++++
.../dce-after-argument-promotion.ll | 6 +-
llvm/test/Transforms/SCCP/push_stores.ll | 102 +++++++++++++++
3 files changed, 224 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/SCCP/push_stores.ll
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index e80c6f7c0f49d4..b0d3a0e21480f3 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ModRef.h"
@@ -265,6 +266,125 @@ static bool runIPSCCP(
}
}
+ // If a function has one use, has an alloca parameter, and its caller has
+ // nothing but geps/stores to the alloca, push the alloca definition and all
+ // stores/geps into the caller. For now, rely on argpromotion to clean up the
+ // dead arguments left in the caller
+ for (auto &F : M) {
+ if (F.hasOneUse() && canTrackArgumentsInterprocedurally(&F)) {
+ CallInst *CI = dyn_cast<CallInst>(*F.user_begin());
+ if (!CI)
+ continue;
+ for (auto &Arg : CI->args()) {
+ auto AI = dyn_cast<AllocaInst>(Arg);
+ if (!AI)
+ continue;
+
+ auto GetAllocaUsers = [&CI](AllocaInst *AI,
+ SmallVector<Value *> &AllocaUsers) -> bool {
+ for (User *U : AI->users()) {
+ if (U == CI)
+ continue;
+
+ auto I = dyn_cast<Instruction>(U);
+ if (!I)
+ continue;
+ switch (I->getOpcode()) {
+ default: {
+ return false;
+ }
+ case Instruction::Store: {
+ auto SI = cast<StoreInst>(U);
+ if (SI->isVolatile() || !isa<Constant>(SI->getValueOperand())) {
+ return false;
+ }
+ AllocaUsers.push_back(SI);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ auto GEP = cast<GetElementPtrInst>(U);
+ auto SI = dyn_cast<StoreInst>(*GEP->users().begin());
+ if (GEP->getNumUses() != 1 || !SI ||
+ !isa<Constant>(SI->getValueOperand())) {
+ return false;
+ }
+ AllocaUsers.push_back(GEP);
+ break;
+ }
+ }
+ }
+ return !AllocaUsers.empty();
+ };
+
+ SmallVector<Value *> AllocaUsers;
+ if (!GetAllocaUsers(AI, AllocaUsers))
+ continue;
+
+ // Copy uses of the Alloca to the callee
+ IRBuilder<> B(&F.getEntryBlock().front());
+ DataLayout DL = AI->getDataLayout();
+ AllocaInst *NewAI =
+ B.CreateAlloca(AI->getAllocatedType(), nullptr, AI->getName());
+ F.getArg(Arg.getOperandNo())->replaceAllUsesWith(NewAI);
+ NewAI->setAlignment(AI->getAlign());
+
+ for (auto U : AllocaUsers) {
+ switch (cast<Instruction>(U)->getOpcode()) {
+ default:
+ llvm_unreachable("Illegal user type in AllocaUsers");
+ case Instruction::Store: {
+ auto SI = cast<StoreInst>(U);
+ auto NewStore = B.CreateStore(SI->getValueOperand(), NewAI);
+ NewStore->setAlignment(SI->getAlign());
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ auto GEP = cast<GetElementPtrInst>(U);
+
+ SmallVector<Value *> GepIndices;
+ for (unsigned i = 0; i < GEP->getNumIndices(); i++)
+ GepIndices.push_back(GEP->getOperand(i + 1));
+
+ GetElementPtrInst *NewGep = cast<GetElementPtrInst>(
+ B.CreateGEP(GEP->getSourceElementType(), NewAI, GepIndices));
+ NewGep->setNoWrapFlags(GEP->getNoWrapFlags());
+
+ auto SI = cast<StoreInst>(*GEP->users().begin());
+ auto NewStore = B.CreateStore(SI->getValueOperand(), NewGep);
+ NewStore->setAlignment(SI->getAlign());
+ }
+ }
+ }
+
+ // Remove old uses of the Alloca in the caller
+ while (!AllocaUsers.empty()) {
+ Instruction *I = cast<Instruction>(AllocaUsers.pop_back_val());
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Illegal user type when removing Alloca users");
+ case Instruction::Store: {
+ I->removeFromParent();
+ I->deleteValue();
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ auto SI = cast<Instruction>(*I->users().begin());
+ SI->removeFromParent();
+ SI->deleteValue();
+ I->removeFromParent();
+ I->deleteValue();
+ }
+ }
+ }
+ MadeChanges = true;
+
+ // TODO:
+ // - delete dead params here instead of relying on argpromotion
+ // - remove empty alloca instruction
+ }
+ }
+ }
+
// If we inferred constant or undef return values for a function, we replaced
// all call uses with the inferred value. This means we don't need to bother
// actually returning anything from the function. Replace all return
diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll
index c33fcfbe6ed973..67e2d4e5f8a87f 100644
--- a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll
@@ -9,10 +9,8 @@
define internal void @f(ptr byval(%struct.ss) align 8 %b, ptr byval(i32) align 4 %X) noinline nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (i32 [[B_0:%.*]]){{[^#]*}} #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
-; CHECK-NEXT: store i32 [[TEMP]], ptr [[DUMMY]], align 4
+; CHECK-NEXT: store i32 2, ptr [[DUMMY]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -27,7 +25,7 @@ define i32 @test(ptr %X) {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (ptr {{[^%]*}} [[X:%.*]]){{[^#]*}} #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call {{.*}}void @f(i32 1)
+; CHECK-NEXT: tail call {{.*}}void @f()
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/SCCP/push_stores.ll b/llvm/test/Transforms/SCCP/push_stores.ll
new file mode 100644
index 00000000000000..a5265c7f85cff7
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/push_stores.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=ipsccp,argpromotion < %s | FileCheck %s
+
+ at sudoku0 = internal global [9 x [9 x i32]] zeroinitializer
+ at sudoku1 = internal global [9 x [9 x i32]] zeroinitializer
+
+declare void @callee2(ptr nocapture nonnull readonly %0)
+
+define internal i64 @callee(ptr nocapture readonly %0, ptr nocapture readonly %1) local_unnamed_addr {
+; CHECK-LABEL: define internal i64 @callee() local_unnamed_addr {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca { ptr, i64 }, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
+; CHECK-NEXT: store i64 4, ptr [[TMP2]], align 8
+; CHECK-NEXT: store ptr @sudoku1, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = alloca { ptr, i64 }, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT: store i64 4, ptr [[TMP4]], align 8
+; CHECK-NEXT: store ptr @sudoku0, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[MEGASTRUCT:%.*]] = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } }, align 8
+; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[GEP_0_1:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[GEP_0_1]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[GEP_1_1]], align 8
+; CHECK-NEXT: store ptr [[ALLOCA0]], ptr [[MEGASTRUCT]], align 8
+; CHECK-NEXT: [[MEGASTRUCT_GEP_1:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 8
+; CHECK-NEXT: store ptr [[ALLOCA1]], ptr [[MEGASTRUCT_GEP_1]], align 8
+; CHECK-NEXT: [[MEGASTRUCT_GEP_2:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 16
+; CHECK-NEXT: store ptr [[TMP5]], ptr [[MEGASTRUCT_GEP_2]], align 8
+; CHECK-NEXT: [[MEGASTRUCT_GEP_3:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 24
+; CHECK-NEXT: store i64 [[TMP6]], ptr [[MEGASTRUCT_GEP_3]], align 8
+; CHECK-NEXT: [[MEGASTRUCT_GEP_4:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 32
+; CHECK-NEXT: store ptr [[TMP7]], ptr [[MEGASTRUCT_GEP_4]], align 8
+; CHECK-NEXT: [[MEGASTRUCT_GEP_5:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 40
+; CHECK-NEXT: store i64 [[TMP8]], ptr [[MEGASTRUCT_GEP_5]], align 8
+; CHECK-NEXT: call fastcc void @callee2(ptr [[MEGASTRUCT]])
+; CHECK-NEXT: ret i64 poison
+;
+ %megastruct = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } }
+
+ %alloca0 = alloca i32, align 4
+ %alloca1 = alloca i32, align 4
+
+ %3 = load ptr, ptr %0, align 8
+ %gep.0.1 = getelementptr inbounds i8, ptr %0, i64 8
+ %4 = load i64, ptr %gep.0.1, align 8
+
+ %5 = load ptr, ptr %0, align 8
+ %gep.1.1 = getelementptr inbounds i8, ptr %1, i64 8
+ %6 = load i64, ptr %gep.1.1, align 8
+
+ store ptr %alloca0, ptr %megastruct, align 8
+ %megastruct.gep.1 = getelementptr inbounds i8, ptr %megastruct, i64 8
+ store ptr %alloca1, ptr %megastruct.gep.1, align 8
+
+ %megastruct.gep.2 = getelementptr inbounds i8, ptr %megastruct, i64 16
+ store ptr %3, ptr %megastruct.gep.2, align 8
+
+ %megastruct.gep.3 = getelementptr inbounds i8, ptr %megastruct, i64 24
+ store i64 %4, ptr %megastruct.gep.3, align 8
+
+ %megastruct.gep.4 = getelementptr inbounds i8, ptr %megastruct, i64 32
+ store ptr %5, ptr %megastruct.gep.4, align 8
+
+ %megastruct.gep.5 = getelementptr inbounds i8, ptr %megastruct, i64 40
+ store i64 %6, ptr %megastruct.gep.5, align 8
+
+ call fastcc void @callee2(ptr %megastruct)
+ ret i64 1
+}
+
+define i64 @caller() local_unnamed_addr {
+; CHECK-LABEL: define i64 @caller() local_unnamed_addr {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee()
+; CHECK-NEXT: ret i64 1
+;
+ %1 = alloca { ptr, i64 }, align 8
+ %2 = alloca { ptr, i64 }, align 8
+
+ store ptr @sudoku0, ptr %1, align 8
+ %.gep0 = getelementptr inbounds i8, ptr %1, i64 8
+ store i64 4, ptr %.gep0, align 8
+
+ store ptr @sudoku1, ptr %2, align 8
+ %.gep1 = getelementptr inbounds i8, ptr %2, i64 8
+ store i64 4, ptr %.gep1, align 8
+
+ %p = call i64 @callee(ptr nonnull %1, ptr nonnull %2)
+ ret i64 %p
+}
+
+define i64 @m() local_unnamed_addr {
+; CHECK-LABEL: define i64 @m() local_unnamed_addr {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @caller()
+; CHECK-NEXT: ret i64 1
+;
+ %1 = call i64 @caller()
+ ret i64 %1
+}
More information about the llvm-commits
mailing list