[llvm] 170c4d2 - [ArgPromotion] Unify byval promotion with non-byval
Pavel Samolysov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 28 05:23:11 PDT 2022
Author: Pavel Samolysov
Date: 2022-06-28T15:19:58+03:00
New Revision: 170c4d21bd94d4f183c2fec1dd7d261360df7bae
URL: https://github.com/llvm/llvm-project/commit/170c4d21bd94d4f183c2fec1dd7d261360df7bae
DIFF: https://github.com/llvm/llvm-project/commit/170c4d21bd94d4f183c2fec1dd7d261360df7bae.diff
LOG: [ArgPromotion] Unify byval promotion with non-byval
It makes sense to handle byval promotion in the same way as non-byval
but also allowing `store` instructions. However, these should
use the same checks as the `load` instructions do, i.e. be part of the
`ArgsToPromote` collection. For these instructions, the check for
interfering modifications can be disabled, though. The promotion
algorithm itself has been modified a lot: all the accesses (i.e. loads
and stores) are rewritten to the emitted `alloca` instructions. To
optimize these new `alloca`s out, the `PromoteMemToReg` function from
`Transforms/Utils/PromoteMemoryToRegister.cpp` file is invoked after
promotion.
In order to let the `PromoteMemToReg` promote as many `alloca`s as it
is possible, there should be no `GEP`s from the `alloca`s. To
eliminate the `GEP`s, its own `alloca` is generated for every argument
part because a single `alloca` for the whole argument (that
significantly simplifies the code of the pass though) unfortunately
cannot be used.
The idea comes from the following discussion:
https://reviews.llvm.org/D124514#3479676
Differential Revision: https://reviews.llvm.org/D125485
Added:
llvm/test/Transforms/ArgumentPromotion/byval-with-padding.ll
llvm/test/Transforms/ArgumentPromotion/store-after-load.ll
llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll
Modified:
llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
llvm/test/Transforms/ArgumentPromotion/attrs.ll
llvm/test/Transforms/ArgumentPromotion/byval-2.ll
llvm/test/Transforms/ArgumentPromotion/byval.ll
llvm/test/Transforms/ArgumentPromotion/dbg.ll
llvm/test/Transforms/ArgumentPromotion/fp80.ll
llvm/test/Transforms/ArgumentPromotion/metadata.ll
Removed:
llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 35481843c0e3e..ac08b6c8877ac 100644
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -28,6 +28,8 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {}
/// Checks if a type could have padding bytes.
+ // TODO the function aren't used in the ArgumentPromotionPass anymore and
+ // should be moved into AttributorAttributes.cpp as the single known user.
static bool isDenselyPacked(Type *Ty, const DataLayout &DL);
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 9470d6641b364..1836fd774606a 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,6 +29,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -56,6 +57,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -75,6 +77,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -86,7 +89,6 @@ using namespace llvm;
#define DEBUG_TYPE "argpromotion"
STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
-STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
namespace {
@@ -94,9 +96,9 @@ namespace {
struct ArgPart {
Type *Ty;
Align Alignment;
- /// A representative guaranteed-executed load instruction for use by
+ /// A representative guaranteed-executed load or store instruction for use by
/// metadata transfer.
- LoadInst *MustExecLoad;
+ Instruction *MustExecInstr;
};
using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
@@ -154,9 +156,9 @@ static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
static Function *doPromotion(
- Function *F,
+ Function *F, function_ref<DominatorTree &(Function &F)> DTGetter,
+ function_ref<AssumptionCache *(Function &F)> ACGetter,
const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
ReplaceCallSite) {
// Start by computing a new prototype for the function, which is the same as
@@ -174,15 +176,7 @@ static Function *doPromotion(
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgNo) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- llvm::append_range(Params, STy->elements());
- ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
- AttributeSet());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
+ if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
@@ -250,29 +244,10 @@ static Function *doPromotion(
auto *AI = CB.arg_begin();
ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++AI, ++ArgNo)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ ++I, ++AI, ++ArgNo) {
+ if (!ArgsToPromote.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
- Align StructAlign = *I->getParamAlign();
- for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J);
- auto *Idx =
- IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(J));
- // TODO: Tell AA about the new values?
- Align Alignment =
- commonAlignment(StructAlign, SL->getElementOffset(J));
- Args.push_back(IRB.CreateAlignedLoad(
- STy->getElementType(J), Idx, Alignment, Idx->getName() + ".val"));
- ArgAttrVec.push_back(AttributeSet());
- }
} else if (!I->use_empty()) {
Value *V = *AI;
const auto &ArgParts = ArgsToPromote.find(&*I)->second;
@@ -281,9 +256,9 @@ static Function *doPromotion(
Pair.second.Ty,
createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
Pair.second.Alignment, V->getName() + ".val");
- if (Pair.second.MustExecLoad) {
- LI->setAAMetadata(Pair.second.MustExecLoad->getAAMetadata());
- LI->copyMetadata(*Pair.second.MustExecLoad,
+ if (Pair.second.MustExecInstr) {
+ LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
+ LI->copyMetadata(*Pair.second.MustExecInstr,
{LLVMContext::MD_range, LLVMContext::MD_nonnull,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
@@ -293,6 +268,7 @@ static Function *doPromotion(
ArgAttrVec.push_back(AttributeSet());
}
}
+ }
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
@@ -342,11 +318,15 @@ static Function *doPromotion(
// function empty.
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ // We will collect all the new created allocas to promote them into registers
+ // after the following loop
+ SmallVector<AllocaInst *, 4> Allocas;
+
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
Function::arg_iterator I2 = NF->arg_begin();
for (Argument &Arg : F->args()) {
- if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
+ if (!ArgsToPromote.count(&Arg)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
Arg.replaceAllUsesWith(&*I2);
@@ -355,37 +335,6 @@ static Function *doPromotion(
continue;
}
- if (ByValArgsToTransform.count(&Arg)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
-
- // Just add all the struct element types.
- Type *AgTy = Arg.getParamByValType();
- Align StructAlign = *Arg.getParamAlign();
- Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
- StructAlign, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
- nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
-
- for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(J),
- InsertPt);
- I2->setName(Arg.getName() + "." + Twine(J));
- Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(J));
- new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
- }
-
- // Anything that used the arg should now use the alloca.
- Arg.replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&Arg);
- continue;
- }
-
// There potentially are metadata uses for things like llvm.dbg.value.
// Replace them with undef, after handling the other regular uses.
auto RauwUndefMetadata = make_scope_exit(
@@ -394,16 +343,45 @@ static Function *doPromotion(
if (Arg.use_empty())
continue;
- SmallDenseMap<int64_t, Argument *> OffsetToArg;
+ // Otherwise, if we promoted this argument, we have to create an alloca in
+ // the callee for every promotable part and store each of the new incoming
+ // arguments into the corresponding alloca, what lets the old code (the
+ // store instructions if they are allowed especially) a chance to work as
+ // before.
+ assert(Arg.getType()->isPointerTy() &&
+ "Only arguments with a pointer type are promotable");
+
+ IRBuilder<NoFolder> IRB(&NF->begin()->front());
+
+ // Add only the promoted elements, so parts from ArgsToPromote
+ SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
- Argument &NewArg = *I2++;
- NewArg.setName(Arg.getName() + "." + Twine(Pair.first) + ".val");
- OffsetToArg.insert({Pair.first, &NewArg});
+ int64_t Offset = Pair.first;
+ const ArgPart &Part = Pair.second;
+
+ Argument *NewArg = I2++;
+ NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
+
+ AllocaInst *NewAlloca = IRB.CreateAlloca(
+ Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
+ NewAlloca->setAlignment(Pair.second.Alignment);
+ IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
+
+ // Collect the alloca to retarget the users to
+ OffsetToAlloca.insert({Offset, NewAlloca});
}
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (with possible casts and GEPs in between).
+ auto GetAlloca = [&](Value *Ptr) {
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ true);
+ assert(Ptr == &Arg && "Not constant offset from arg?");
+ return OffsetToAlloca.lookup(Offset.getSExtValue());
+ };
+ // Cleanup the code from the dead instructions: GEPs and BitCasts in between
+ // the original argument and its users: loads and stores. Retarget every
+ // user to the new created alloca.
SmallVector<Value *, 16> Worklist;
SmallVector<Instruction *, 16> DeadInsts;
append_range(Worklist, Arg.users());
@@ -417,13 +395,14 @@ static Function *doPromotion(
if (auto *LI = dyn_cast<LoadInst>(V)) {
Value *Ptr = LI->getPointerOperand();
- APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
- Ptr =
- Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
- /* AllowNonInbounds */ true);
- assert(Ptr == &Arg && "Not constant offset from arg?");
- LI->replaceAllUsesWith(OffsetToArg[Offset.getSExtValue()]);
- DeadInsts.push_back(LI);
+ LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
+ continue;
+ }
+
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
+ Value *Ptr = SI->getPointerOperand();
+ SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
continue;
}
@@ -434,6 +413,23 @@ static Function *doPromotion(
I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
+
+ // Collect the allocas for promotion
+ for (const auto &Pair : OffsetToAlloca) {
+ assert(isAllocaPromotable(Pair.second) &&
+ "By design, only promotable allocas should be produced.");
+ Allocas.push_back(Pair.second);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
+ << " alloca(s) are promotable by Mem2Reg\n");
+
+ if (!Allocas.empty()) {
+ // And we are able to call the `promoteMemoryToRegister()` function.
+ // Our earlier checks have ensured that PromoteMemToReg() will
+ // succeed.
+ PromoteMemToReg(Allocas, DTGetter(*NF), ACGetter(*NF));
}
return NF;
@@ -456,8 +452,8 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg,
// direct callees.
return all_of(Callee->users(), [&](User *U) {
CallBase &CB = cast<CallBase>(*U);
- return isDereferenceableAndAlignedPointer(
- CB.getArgOperand(Arg->getArgNo()), NeededAlign, Bytes, DL);
+ return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
+ NeededAlign, Bytes, DL);
});
}
@@ -470,7 +466,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
if (Arg->use_empty())
return true;
- // We can only promote this argument if all of the uses are loads at known
+ // We can only promote this argument if all the uses are loads at known
// offsets.
//
// Promoting the argument causes it to be loaded in the caller
@@ -487,15 +483,22 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
Align NeededAlign(1);
uint64_t NeededDerefBytes = 0;
- // Returns None if this load is not based on the argument. Return true if
- // we can promote the load, false otherwise.
- auto HandleLoad = [&](LoadInst *LI,
- bool GuaranteedToExecute) -> Optional<bool> {
- // Don't promote volatile or atomic loads.
- if (!LI->isSimple())
+ // And if this is a byval argument we also allow to have store instructions.
+ // Only handle in such way arguments with specified alignment;
+ // if it's unspecified, the actual alignment of the argument is
+ // target-specific.
+ bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
+
+ // An end user of a pointer argument is a load or store instruction.
+ // Returns None if this load or store is not based on the argument. Return
+ // true if we can promote the instruction, false otherwise.
+ auto HandleEndUser = [&](auto *I, Type *Ty,
+ bool GuaranteedToExecute) -> Optional<bool> {
+ // Don't promote volatile or atomic instructions.
+ if (!I->isSimple())
return false;
- Value *Ptr = LI->getPointerOperand();
+ Value *Ptr = I->getPointerOperand();
APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
/* AllowNonInbounds */ true);
@@ -505,7 +508,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
if (Offset.getSignificantBits() >= 64)
return false;
- Type *Ty = LI->getType();
TypeSize Size = DL.getTypeStoreSize(Ty);
// Don't try to promote scalable types.
if (Size.isScalable())
@@ -518,7 +520,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
int64_t Off = Offset.getSExtValue();
auto Pair = ArgParts.try_emplace(
- Off, ArgPart{Ty, LI->getAlign(), GuaranteedToExecute ? LI : nullptr});
+ Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
ArgPart &Part = Pair.first->second;
bool OffsetNotSeenBefore = Pair.second;
@@ -530,44 +532,49 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
return false;
}
- // For now, we only support loading one specific type at a given offset.
+ // For now, we only support loading/storing one specific type at a given
+ // offset.
if (Part.Ty != Ty) {
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
- << "loaded via both " << *Part.Ty << " and " << *Ty
+ << "accessed as both " << *Part.Ty << " and " << *Ty
<< " at offset " << Off << "\n");
return false;
}
- // If this load is not guaranteed to execute, and we haven't seen a load at
- // this offset before (or it had lower alignment), then we need to remember
- // that requirement.
- // Note that skipping loads of previously seen offsets is only correct
- // because we only allow a single type for a given offset, which also means
- // that the number of accessed bytes will be the same.
+ // If this instruction is not guaranteed to execute, and we haven't seen a
+ // load or store at this offset before (or it had lower alignment), then we
+ // need to remember that requirement.
+ // Note that skipping instructions of previously seen offsets is only
+ // correct because we only allow a single type for a given offset, which
+ // also means that the number of accessed bytes will be the same.
if (!GuaranteedToExecute &&
- (OffsetNotSeenBefore || Part.Alignment < LI->getAlign())) {
+ (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
// We won't be able to prove dereferenceability for negative offsets.
if (Off < 0)
return false;
// If the offset is not aligned, an aligned base pointer won't help.
- if (!isAligned(LI->getAlign(), Off))
+ if (!isAligned(I->getAlign(), Off))
return false;
NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
- NeededAlign = std::max(NeededAlign, LI->getAlign());
+ NeededAlign = std::max(NeededAlign, I->getAlign());
}
- Part.Alignment = std::max(Part.Alignment, LI->getAlign());
+ Part.Alignment = std::max(Part.Alignment, I->getAlign());
return true;
};
- // Look for loads that are guaranteed to execute on entry.
+ // Look for loads and stores that are guaranteed to execute on entry.
for (Instruction &I : Arg->getParent()->getEntryBlock()) {
+ Optional<bool> Res{};
if (LoadInst *LI = dyn_cast<LoadInst>(&I))
- if (Optional<bool> Res = HandleLoad(LI, /* GuaranteedToExecute */ true))
- if (!*Res)
- return false;
+ Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ true);
+ if (Res && !*Res)
+ return false;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
@@ -575,36 +582,49 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
// Now look at all loads of the argument. Remember the load instructions
// for the aliasing check below.
- SmallVector<Value *, 16> Worklist;
- SmallPtrSet<Value *, 16> Visited;
+ SmallVector<const Use *, 16> Worklist;
+ SmallPtrSet<const Use *, 16> Visited;
SmallVector<LoadInst *, 16> Loads;
- auto AppendUsers = [&](Value *V) {
- for (User *U : V->users())
- if (Visited.insert(U).second)
- Worklist.push_back(U);
+ auto AppendUses = [&](const Value *V) {
+ for (const Use &U : V->uses())
+ if (Visited.insert(&U).second)
+ Worklist.push_back(&U);
};
- AppendUsers(Arg);
+ AppendUses(Arg);
while (!Worklist.empty()) {
- Value *V = Worklist.pop_back_val();
+ const Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
if (isa<BitCastInst>(V)) {
- AppendUsers(V);
+ AppendUses(V);
continue;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
if (!GEP->hasAllConstantIndices())
return false;
- AppendUsers(V);
+ AppendUses(V);
continue;
}
if (auto *LI = dyn_cast<LoadInst>(V)) {
- if (!*HandleLoad(LI, /* GuaranteedToExecute */ false))
+ if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
return false;
Loads.push_back(LI);
continue;
}
+ // Stores are allowed for byval arguments
+ auto *SI = dyn_cast<StoreInst>(V);
+ if (AreStoresAllowed && SI &&
+ U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
+ if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ false))
+ return false;
+ continue;
+ // Only stores TO the argument is allowed, all the other stores are
+ // unknown users
+ }
+
// Unknown user.
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
<< "unknown user " << *V << "\n");
@@ -630,8 +650,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
[](const auto &A, const auto &B) { return A.first < B.first; });
// Make sure the parts are non-overlapping.
- // TODO: As we're doing pure load promotion here, overlap should be fine from
- // a correctness perspective. Profitability is less obvious though.
int64_t Offset = ArgPartsVec[0].first;
for (const auto &Pair : ArgPartsVec) {
if (Pair.first < Offset)
@@ -640,6 +658,12 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
}
+ // If store instructions are allowed, the path from the entry of the function
+ // to each load may be not free of instructions that potentially invalidate
+ // the load, and this is an admissible situation.
+ if (AreStoresAllowed)
+ return true;
+
// Okay, now we know that the argument is only used by load instructions, and
// it is safe to unconditionally perform all of them. Use alias analysis to
// check to see if the pointer is guaranteed to not be modified from entry of
@@ -712,40 +736,6 @@ bool ArgumentPromotionPass::isDenselyPacked(Type *Ty, const DataLayout &DL) {
return true;
}
-/// Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *Arg) {
- assert(Arg->hasByValAttr());
-
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(Arg);
-
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
-
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList(Arg->users());
- while (!WorkList.empty()) {
- Value *V = WorkList.pop_back_val();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- append_range(WorkList, V->users());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
- }
- }
-
- // Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
-
- return false;
-}
-
/// Check if callers and callee agree on how promoted arguments would be
/// passed.
static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
@@ -767,6 +757,8 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
/// calls the DoPromotion method.
static Function *
promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
+ function_ref<DominatorTree &(Function &F)> DTGetter,
+ function_ref<AssumptionCache *(Function &F)> ACGetter,
unsigned MaxElements,
Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
ReplaceCallSite,
@@ -774,7 +766,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// Don't perform argument promotion for naked functions; otherwise we can end
// up removing parameters that are seemingly 'not used' as they are referred
// to in the assembly.
- if(F->hasFnAttribute(Attribute::Naked))
+ if (F->hasFnAttribute(Attribute::Naked))
return nullptr;
// Make sure that it is local to this module.
@@ -833,7 +825,6 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
- SmallPtrSet<Argument *, 8> ByValArgsToTransform;
for (Argument *PtrArg : PointerArgs) {
// Replace sret attribute with noalias. This reduces register pressure by
// avoiding a register copy.
@@ -850,6 +841,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If we can promote the pointer to its value.
SmallVector<OffsetAndArgPart, 4> ArgParts;
+
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
SmallVector<Type *, 4> Types;
for (const auto &Pair : ArgParts)
@@ -857,56 +849,15 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (areTypesABICompatible(Types, *F, TTI)) {
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
- continue;
}
}
-
- // Otherwise, if this is a byval argument, and if the aggregate type is
- // small, just pass the elements, which is always safe, if the passed value
- // is densely packed or if we can prove the padding bytes are never
- // accessed.
- //
- // Only handle arguments with specified alignment; if it's unspecified, the
- // actual alignment of the argument is target-specific.
- Type *ByValTy = PtrArg->getParamByValType();
- bool IsSafeToPromote =
- ByValTy && PtrArg->getParamAlign() &&
- (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) ||
- !canPaddingBeAccessed(PtrArg));
- if (!IsSafeToPromote) {
- LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
- << " the argument '" << PtrArg->getName()
- << "' because it is not safe.\n");
- continue;
- }
- if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
- << " the argument '" << PtrArg->getName()
- << "' because it would require adding more"
- << " than " << MaxElements
- << " arguments to the function.\n");
- continue;
- }
- SmallVector<Type *, 4> Types;
- append_range(Types, STy->elements());
-
- // If all the elements are single-value types, we can promote it.
- bool AllSimple =
- all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
-
- // Safe to transform. Passing the elements as a scalar will allow sroa to
- // hack on the new alloca we introduce.
- if (AllSimple && areTypesABICompatible(Types, *F, TTI))
- ByValArgsToTransform.insert(PtrArg);
- }
}
// No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ if (ArgsToPromote.empty())
return nullptr;
- return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+ return doPromotion(F, DTGetter, ACGetter, ArgsToPromote, ReplaceCallSite);
}
PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
@@ -933,9 +884,19 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
return FAM.getResult<AAManager>(F);
};
- const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
- Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None,
- TTI, IsRecursive);
+ auto DTGetter = [&](Function &F) -> DominatorTree & {
+ assert(&F != &OldF && "Called with the obsolete function!");
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+
+ auto ACGetter = [&](Function &F) -> AssumptionCache * {
+ assert(&F != &OldF && "Called with the obsolete function!");
+ return &FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ const auto &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
+ Function *NewF = promoteArguments(&OldF, AARGetter, DTGetter, ACGetter,
+ MaxElements, None, TTI, IsRecursive);
if (!NewF)
continue;
LocalChange = true;
diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
index 3365199d95535..9e076fb301a97 100644
--- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
@@ -3,25 +3,14 @@
%struct.ss = type { i32, i64 }
-; Don't drop 'byval' on %X here.
define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b, i32* byval(i32) align 4 %X, i32 %i) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[X:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4
-; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
-; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4
-; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1
-; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4
-; CHECK-NEXT: store i32 0, i32* [[X]], align 4
+; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
; CHECK-NEXT: ret void
;
entry:
-
%temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%temp1 = load i32, i32* %temp, align 4
%temp2 = add i32 %temp1, 1
@@ -41,11 +30,10 @@ define i32 @test(i32* %X) {
; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8
; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4
-; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
-; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
-; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]], i32 zeroext 0)
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i32 [[X_VAL]], i32 zeroext 0)
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
index 42b7d6d31905d..199f089932317 100644
--- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -2,24 +2,14 @@
; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
; Arg promotion eliminates the struct argument.
-; FIXME: We should eliminate the i32* argument.
%struct.ss = type { i32, i64 }
define internal void @f(%struct.ss* byval(%struct.ss) align 8 %b, i32* byval(i32) align 4 %X) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 8
-; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
-; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4
-; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1
-; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4
-; CHECK-NEXT: store i32 0, i32* [[X]], align 4
+; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
; CHECK-NEXT: ret void
;
entry:
@@ -41,11 +31,10 @@ define i32 @test(i32* %X) {
; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8
; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4
-; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 8
-; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
-; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]])
+; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i32 [[X_VAL]])
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll b/llvm/test/Transforms/ArgumentPromotion/byval-with-padding.ll
similarity index 100%
rename from llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll
rename to llvm/test/Transforms/ArgumentPromotion/byval-with-padding.ll
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll
index 2416345400c3d..0b02e8b129b94 100644
--- a/llvm/test/Transforms/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll
@@ -7,17 +7,9 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4
-; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
-; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4
-; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1
-; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4
+; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
; CHECK-NEXT: ret void
;
entry:
@@ -28,20 +20,11 @@ entry:
ret void
}
-
define internal void @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind {
; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 32
-; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4
-; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4
-; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1
-; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4
+; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
; CHECK-NEXT: ret void
;
entry:
@@ -75,6 +58,63 @@ entry:
ret void
}
+; Transform even if an argument is written to and then is loaded from.
+define internal void @k(%struct.ss* byval(%struct.ss) align 4 %b) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@k
+; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+ %temp1 = load i32, i32* %temp, align 4
+ %temp2 = add i32 %temp1, 1
+ store i32 %temp2, i32* %temp, align 4
+ %temp3 = load i32, i32* %temp, align 4
+ ret void
+}
+
+; Transform even if a store instruction is the single user.
+define internal void @l(%struct.ss* byval(%struct.ss) align 4 %b) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@l
+; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+;
+entry:
+ %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+ store i32 1, i32* %temp, align 4
+ ret void
+}
+
+; Transform all the arguments creating the required number of 'alloca's and
+; then optimize them out.
+define internal void @m(%struct.ss* byval(%struct.ss) align 4 %b, %struct.ss* byval(%struct.ss) align 4 %c) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@m
+; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[C_0:%.*]], i64 [[C_1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[B_0]], 1
+; CHECK-NEXT: [[TEMP6:%.*]] = add i64 [[C_1]], 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
+ %temp1 = load i32, i32* %temp, align 4
+ %temp2 = add i32 %temp1, 1
+ store i32 %temp2, i32* %temp, align 4
+
+ %temp3 = getelementptr %struct.ss, %struct.ss* %c, i32 0, i32 0
+ store i32 %temp2, i32* %temp3, align 4
+
+ %temp4 = getelementptr %struct.ss, %struct.ss* %c, i32 0, i32 1
+ %temp5 = load i64, i64* %temp4, align 8
+ %temp6 = add i64 %temp5, 1
+ store i64 %temp6, i64* %temp4, align 8
+
+ ret void
+}
+
define i32 @main() nounwind {
; CHECK-LABEL: define {{[^@]+}}@main
; CHECK-SAME: () #[[ATTR0]] {
@@ -84,17 +124,26 @@ define i32 @main() nounwind {
; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8
; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4
-; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
-; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
-; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
-; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 32
-; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
-; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
+; CHECK-NEXT: [[S_0_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_0_0_0_VAL:%.*]] = load i32, i32* [[S_0_0_0]], align 4
+; CHECK-NEXT: call void @f(i32 [[S_0_0_0_VAL]])
+; CHECK-NEXT: [[S_1_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_1_0_0_VAL:%.*]] = load i32, i32* [[S_1_0_0]], align 4
+; CHECK-NEXT: call void @g(i32 [[S_1_0_0_VAL]])
; CHECK-NEXT: call void @h(%struct.ss* byval([[STRUCT_SS]]) [[S]])
+; CHECK-NEXT: [[S_2_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_2_0_0_VAL:%.*]] = load i32, i32* [[S_2_0_0]], align 4
+; CHECK-NEXT: call void @k(i32 [[S_2_0_0_VAL]])
+; CHECK-NEXT: [[S_3_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_3_0_0_VAL:%.*]] = load i32, i32* [[S_3_0_0]], align 4
+; CHECK-NEXT: call void @l(i32 [[S_3_0_0_VAL]])
+; CHECK-NEXT: [[S_4_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_4_0_0_VAL:%.*]] = load i32, i32* [[S_4_0_0]], align 4
+; CHECK-NEXT: [[S_4_1_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0
+; CHECK-NEXT: [[S_4_1_0_VAL:%.*]] = load i32, i32* [[S_4_1_0]], align 4
+; CHECK-NEXT: [[S_4_1_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1
+; CHECK-NEXT: [[S_4_1_1_VAL:%.*]] = load i64, i64* [[S_4_1_1]], align 8
+; CHECK-NEXT: call void @m(i32 [[S_4_0_0_VAL]], i32 [[S_4_1_0_VAL]], i64 [[S_4_1_1_VAL]])
; CHECK-NEXT: ret i32 0
;
entry:
@@ -106,7 +155,8 @@ entry:
call void @f(%struct.ss* byval(%struct.ss) align 4 %S) nounwind
call void @g(%struct.ss* byval(%struct.ss) align 32 %S) nounwind
call void @h(%struct.ss* byval(%struct.ss) %S) nounwind
+ call void @k(%struct.ss* byval(%struct.ss) align 4 %S) nounwind
+ call void @l(%struct.ss* byval(%struct.ss) align 4 %S) nounwind
+ call void @m(%struct.ss* byval(%struct.ss) align 4 %S, %struct.ss* byval(%struct.ss) align 4 %S) nounwind
ret i32 0
}
-
-
diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
index 3df10ab90bdff..a9d89d5864242 100644
--- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
@@ -17,22 +17,20 @@ define internal void @test(i32** %X) !dbg !2 {
%struct.pair = type { i32, i32 }
+; Do not promote because there is a store of the pointer %P itself. Even if %P
+; had been promoted as a byval argument, the result would have been not
+; optimizable for SROA.
define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) {
; CHECK-LABEL: define {{[^@]+}}@test_byval
-; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
-; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
-; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4
-; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1
-; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4
+; CHECK-SAME: ([[STRUCT_PAIR:%.*]]* byval([[STRUCT_PAIR]]) align 4 [[P:%.*]]) {
; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8
-; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
-; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8
+; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
+; CHECK-NEXT: store i32* [[TEMP]], i32** [[SINK]], align 8
; CHECK-NEXT: ret void
;
%1 = alloca i32*, align 8
%2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0
- store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion
+ store i32* %2, i32** %1, align 8 ; to protect from promotion
ret void
}
@@ -42,11 +40,7 @@ define void @caller(i32** %Y, %struct.pair* %P) {
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg [[DBG4:![0-9]+]]
; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg [[DBG4]]
; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg [[DBG4]]
-; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg [[DBG5:![0-9]+]]
-; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg [[DBG5]]
-; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg [[DBG5]]
-; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg [[DBG5]]
-; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg [[DBG5]]
+; CHECK-NEXT: call void @test_byval([[STRUCT_PAIR]]* byval([[STRUCT_PAIR]]) align 4 [[P]]), !dbg [[DBG5:![0-9]+]]
; CHECK-NEXT: ret void
;
call void @test(i32** %Y), !dbg !1
diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
index a0143d31cd934..90b6998495964 100644
--- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
@@ -14,23 +14,23 @@ target triple = "x86_64-unknown-linux-gnu"
define void @run() {
; CHECK-LABEL: define {{[^@]+}}@run() {
-; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %union.u* bitcast (%struct.s* @b to %union.u*) to i8*
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 10
; CHECK-NEXT: [[DOTVAL:%.*]] = load i8, i8* [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call i8 @UseLongDoubleUnsafely(i8 [[DOTVAL]])
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0
+; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i64 0, i32 0
; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]])
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.Foo* @a to i64*
-; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP4]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]])
-; CHECK-NEXT: [[TMP6:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*))
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.Foo* @a to i64*
+; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP5]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a)
; CHECK-NEXT: ret void
;
-entry:
tail call i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*))
tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*))
+ tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*))
call i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) @a)
call i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) @a)
ret void
@@ -38,11 +38,9 @@ entry:
define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) {
; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely
-; CHECK-SAME: (i8 [[ARG_10_VAL:%.*]]) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: ret i8 [[ARG_10_VAL]]
+; CHECK-SAME: (i8 [[ARG_0_VAL:%.*]]) {
+; CHECK-NEXT: ret i8 [[ARG_0_VAL]]
;
-entry:
%bitcast = bitcast %union.u* %arg to %struct.s*
%gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
%result = load i8, i8* %gep
@@ -51,23 +49,30 @@ entry:
define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 %arg) {
; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
-; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) {
-; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
-; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0
-; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16
+; CHECK-SAME: (x86_fp80 [[ARG_0_VAL:%.*]]) {
+; CHECK-NEXT: ret x86_fp80 [[ARG_0_VAL]]
+;
+ %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
+ %fp80 = load x86_fp80, x86_fp80* %gep
+ ret x86_fp80 %fp80
+}
+
+define internal x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 %arg) {
+; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafelyNoPromotion
+; CHECK-SAME: ([[UNION_U]]* byval([[UNION_U]]) align 16 [[ARG:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0
-; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8
-; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8
-; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8
+; CHECK-NEXT: [[TMP_IDX:%.*]] = alloca i64, align 8
+; CHECK-NEXT: store i64 0, i64* [[TMP_IDX]], align 8
+; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[TMP_IDX]], align 8
; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0
-; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16
+; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]]
; CHECK-NEXT: ret x86_fp80 [[FP80]]
;
%gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
%idx_slot = alloca i64, align 8
store i64 0, i64* %idx_slot, align 8
%idx = load i64, i64* %idx_slot, align 8
- %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion
+ %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from promotion
%fp80 = load x86_fp80, x86_fp80* %gep
ret x86_fp80 %fp80
}
diff --git a/llvm/test/Transforms/ArgumentPromotion/metadata.ll b/llvm/test/Transforms/ArgumentPromotion/metadata.ll
index 3549bcb8f32df..91ec033116012 100644
--- a/llvm/test/Transforms/ArgumentPromotion/metadata.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/metadata.ll
@@ -7,6 +7,8 @@ declare void @use.p32(i32*)
define internal void @callee(i32* %p1, i32** %p2, i32** %p3, i32** %p4, i32** %p5, i32** %p6) {
; CHECK-LABEL: define {{[^@]+}}@callee
; CHECK-SAME: (i32 [[P1_0_VAL:%.*]], i32* [[P2_0_VAL:%.*]], i32* [[P3_0_VAL:%.*]], i32* [[P4_0_VAL:%.*]], i32* [[P5_0_VAL:%.*]], i32* [[P6_0_VAL:%.*]]) {
+; CHECK-NEXT: [[IS_NOT_NULL:%.*]] = icmp ne i32* [[P2_0_VAL]], null
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_NULL]])
; CHECK-NEXT: call void @use.i32(i32 [[P1_0_VAL]])
; CHECK-NEXT: call void @use.p32(i32* [[P2_0_VAL]])
; CHECK-NEXT: call void @use.p32(i32* [[P3_0_VAL]])
@@ -51,6 +53,8 @@ define internal i32* @callee_conditional(i1 %c, i32** dereferenceable(8) align 8
; CHECK-SAME: (i1 [[C:%.*]], i32* [[P_0_VAL:%.*]]) {
; CHECK-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]]
; CHECK: if:
+; CHECK-NEXT: [[IS_NOT_NULL:%.*]] = icmp ne i32* [[P_0_VAL]], null
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_NULL]])
; CHECK-NEXT: ret i32* [[P_0_VAL]]
; CHECK: else:
; CHECK-NEXT: ret i32* null
diff --git a/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll b/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll
new file mode 100644
index 0000000000000..117c2c3a5b49c
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
+
+; Store instructions are allowed users for byval arguments only.
+define internal void @callee(i32* %arg) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i32* [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEMP:%.*]] = load i32, i32* [[ARG]], align 4
+; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TEMP]], 1
+; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %temp = load i32, i32* %arg, align 4
+ %sum = add i32 %temp, 1
+ store i32 %sum, i32* %arg, align 4
+ ret void
+}
+
+define i32 @caller(i32* %arg) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i32* [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @callee(i32* [[ARG]]) #[[ATTR0]]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ call void @callee(i32* %arg) nounwind
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll
new file mode 100644
index 0000000000000..7d7099003dc77
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%struct.ss = type { i32, i64 }
+
+define internal void @f(ptr byval(ptr) align 4 %p) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store ptr [[P]], ptr [[P]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store ptr %p, ptr %p
+ ret void
+}
+
+define internal void @g(ptr byval(ptr) align 4 %p) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; CHECK-NEXT: store ptr [[P]], ptr [[P1]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %p1 = getelementptr i8, ptr %p, i64 4
+ store ptr %p, ptr %p1
+ ret void
+}
+
+define internal void @h(ptr byval(ptr) align 4 %p) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@h
+; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; CHECK-NEXT: store ptr [[P1]], ptr [[P]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %p1 = getelementptr i8, ptr %p, i64 4
+ store ptr %p1, ptr %p
+ ret void
+}
+
+define internal void @k(ptr byval(ptr) align 4 %p) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@k
+; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load ptr, ptr [[P]]
+; CHECK-NEXT: store ptr [[P]], ptr [[X]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %x = load ptr, ptr %p
+ store ptr %p, ptr %x
+ ret void
+}
+
+define internal void @l(ptr byval(ptr) align 4 %p) nounwind {
+; CHECK-LABEL: define {{[^@]+}}@l
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+;
+entry:
+ %x = load ptr, ptr %p
+ store ptr %x, ptr %p
+ ret void
+}
+
+define i32 @main() nounwind {
+; CHECK-LABEL: define {{[^@]+}}@main
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
+; CHECK-NEXT: [[TEMP1:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 0
+; CHECK-NEXT: store i32 1, ptr [[TEMP1]], align 4
+; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1
+; CHECK-NEXT: store i64 2, ptr [[TEMP4]], align 8
+; CHECK-NEXT: call void @f(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]]
+; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]]
+; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]]
+; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]]
+; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8
+; CHECK-NEXT: call void @l() #[[ATTR0]]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %S = alloca %struct.ss, align 32
+ %temp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
+ store i32 1, i32* %temp1, align 4
+ %temp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
+ store i64 2, i64* %temp4, align 8
+ call void @f(ptr byval(ptr) align 4 %S) nounwind
+ call void @g(ptr byval(ptr) align 4 %S) nounwind
+ call void @h(ptr byval(ptr) align 4 %S) nounwind
+ call void @k(ptr byval(ptr) align 4 %S) nounwind
+ call void @l(ptr byval(ptr) align 4 %S) nounwind
+ ret i32 0
+}
More information about the llvm-commits
mailing list