[llvm] [LICM] Promote conditional, loop-invariant memory accesses to scalars with intrinsic (PR #93999)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 11:04:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-llvm-transforms
Author: Ivan Shumakov (ii-sc)
<details>
<summary>Changes</summary>
There is a missed opportunity to promote conditional store in the LICM pass. It has been implemented in this old patch:
https://reviews.llvm.org/D115244
This patch has a minor flaw: on some architectures masked store intrinsic lowers to vector instructions. For example, on RISC-V architecture this code
```cpp
int res;
void test(int * restrict a, int N) {
for (int i = 0; i < N; ++i)
if (a[i])
++res;
}
```
translates to the assembler with vector instructions:
```asm
.LBB0_2: # %for.cond.cleanup
andi a4, a4, 1
vsetivli zero, 1, e32, mf2, ta, ma
vmv.v.x v9, a4
vmv.v.x v8, a3
vsetvli zero, zero, e8, mf8, ta, ma
vmsne.vi v0, v9, 0
vse32.v v8, (a2), v0.t
```
which is unnecessary here.
I have implemented `conditional_store` intrinsic for this patch in addition to the original one.
---
Patch is 32.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93999.diff
20 Files Affected:
- (modified) llvm/include/llvm/IR/IRBuilder.h (+4)
- (modified) llvm/include/llvm/IR/Intrinsics.td (+15)
- (modified) llvm/include/llvm/InitializePasses.h (+1)
- (modified) llvm/include/llvm/LinkAllPasses.h (+1)
- (modified) llvm/include/llvm/Transforms/Scalar.h (+5)
- (added) llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h (+30)
- (modified) llvm/lib/IR/IRBuilder.cpp (+16)
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1)
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7-3)
- (modified) llvm/lib/Passes/PassRegistry.def (+1)
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-1)
- (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+3)
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1)
- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+66-3)
- (added) llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp (+115)
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1)
- (added) llvm/test/Transforms/LICM/conditional-store-intrinsic.ll (+22)
- (added) llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll (+76)
- (added) llvm/test/Transforms/LICM/promote-conditional-store-intr.ll (+69)
``````````diff
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 40a9cf507248a..06fc284c7a824 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -818,6 +818,10 @@ class IRBuilderBase {
/// Create a call to llvm.threadlocal.address intrinsic.
CallInst *CreateThreadLocalAddress(Value *Ptr);
+ // Create a call to a Conditional Store intrinsic
+ CallInst *CreateConditionalStore(Value *Val, Value *Ptr, Align Alignment,
+ Value *Condition);
+
/// Create a call to Masked Load intrinsic
CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask,
Value *PassThru = nullptr, const Twine &Name = "");
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 107442623ab7b..95a9f6cc04de2 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2322,6 +2322,21 @@ def int_vp_is_fpclass:
llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
+//===-------------------------- Conditional Intrinsics --------------------===//
+//
+
+def int_conditional_store:
+ DefaultAttrsIntrinsic</*ret_types*/[],
+ /*param_types*/[/*Val*/llvm_any_ty,
+ /*Ptr*/llvm_anyptr_ty,
+ /*Alignment*/llvm_i32_ty,
+ /*Condition*/llvm_i1_ty],
+ /*intr_properties*/[IntrWriteMem,
+ IntrArgMemOnly,
+ IntrWillReturn,
+ /*Alignment is a constant*/ImmArg<ArgIndex<2>>,
+ NoCapture<ArgIndex<1>>]>;
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_load:
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9ba75d491c1c9..c36f035e00bdc 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -169,6 +169,7 @@ void initializeLoopUnrollPass(PassRegistry&);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
+void initializeLowerConditionalStoreIntrinsicLegacyPass(PassRegistry &);
void initializeLowerGlobalDtorsLegacyPassPass(PassRegistry &);
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 30e7c22f31460..042a0e8768380 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -90,6 +90,7 @@ namespace {
(void) llvm::createLoopStrengthReducePass();
(void) llvm::createLoopUnrollPass();
(void) llvm::createLowerConstantIntrinsicsPass();
+ (void)llvm::createLowerConditionalStoreIntrinsicPass();
(void) llvm::createLowerGlobalDtorsLegacyPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index f74a49785e11b..962f5c75a01e6 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -143,6 +143,11 @@ Pass *createMergeICmpsLegacyPass();
FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
extern char &InferAddressSpacesID;
+//===----------------------------------------------------------------------===//
+//
+// Lower conditional store intrinsic
+FunctionPass *createLowerConditionalStoreIntrinsicPass();
+
//===----------------------------------------------------------------------===//
//
// TLSVariableHoist - This pass reduce duplicated TLS address call.
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
new file mode 100644
index 0000000000000..f3b6f6ce2a185
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
@@ -0,0 +1,30 @@
+//===- LowerConditionalStoreIntrinsic.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass for early lowering of conditional store.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct LowerConditionalStoreIntrinsicPass
+ : PassInfoMixin<LowerConditionalStoreIntrinsicPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b32799355d692..4a0ba04a86e24 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -566,6 +566,22 @@ Instruction *IRBuilderBase::CreateNoAliasScopeDeclaration(Value *Scope) {
return CreateCall(FnIntrinsic, {Scope});
}
+/// Create a call to a Conditional Store intrinsic.
+/// \p Val - data to be stored,
+/// \p Ptr - base pointer for the store
+/// \p Alignment - alignment of the destination location
+/// \p Condition - boolean that indicates if store should be performed
+CallInst *IRBuilderBase::CreateConditionalStore(Value *Val, Value *Ptr,
+ Align Alignment,
+ Value *Condition) {
+ auto *PtrTy = cast<PointerType>(Ptr->getType());
+ Type *DataTy = Val->getType();
+ Type *OverloadedTypes[] = {DataTy, PtrTy};
+ Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Condition};
+ return CreateMaskedIntrinsic(Intrinsic::conditional_store, Ops,
+ OverloadedTypes);
+}
+
/// Create a call to a Masked Load intrinsic.
/// \p Ty - vector type to load
/// \p Ptr - base pointer for the load
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..fe6b75c3b8edd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -232,6 +232,7 @@
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1892e16a06528..382c57c0375e0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
@@ -499,7 +500,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -691,7 +692,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -744,7 +745,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
FPM.addPass(CoroElidePass());
invokeScalarOptimizerLateEPCallbacks(FPM, Level);
@@ -1279,6 +1280,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
ExtraPasses.addPass(InstCombinePass());
FPM.addPass(std::move(ExtraPasses));
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
}
// Now that we've formed fast to execute loop structures, we do further
@@ -1354,6 +1356,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1950,6 +1953,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ MainFPM.addPass(LowerConditionalStoreIntrinsicPass());
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..04d01f7d7310e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -385,6 +385,7 @@ FUNCTION_PASS("lower-allow-check", LowerAllowCheckPass())
FUNCTION_PASS("lower-atomic", LowerAtomicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-conditional-store", LowerConditionalStoreIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-invoke", LowerInvokePass())
FUNCTION_PASS("lower-switch", LowerSwitchPass())
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 945ab5cf1f303..db8eb7a952335 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -619,6 +619,9 @@ void AArch64PassConfig::addIRPasses() {
// Do loop invariant code motion in case part of the lowered result is
// invariant.
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbbfe34a63863..a865c81d430e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1046,8 +1046,12 @@ void AMDGPUPassConfig::addIRPasses() {
// Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
// have expanded.
- if (TM.getOptLevel() > CodeGenOptLevel::Less)
+ if (TM.getOptLevel() > CodeGenOptLevel::Less) {
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
+ }
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 714cf69827a1e..3650f4c2121e6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -485,6 +485,9 @@ void PPCPassConfig::addIRPasses() {
// Do loop invariant code motion in case part of the lowered result is
// invariant.
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4..bdecc839729c9 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_component_library(LLVMScalarOpts
LoopUnrollAndJamPass.cpp
LoopVersioningLICM.cpp
LowerAtomicPass.cpp
+ LowerConditionalStoreIntrinsic.cpp
LowerConstantIntrinsics.cpp
LowerExpectIntrinsic.cpp
LowerGuardIntrinsic.cpp
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 5eccf7b4adb65..009b5f3bb7bcc 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -157,6 +157,11 @@ cl::opt<unsigned> llvm::SetLicmMssaOptCap(
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
+cl::opt<bool> SetLicmConditionalAccessPromotion(
+ "licm-conditional-access-promotion", cl::Hidden, cl::init(true),
+ cl::desc("Enable promotion of conditional accesses of loop-invariant"
+ " locations"));
+
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
@@ -1819,6 +1824,12 @@ class LoopPromoter : public LoadAndStorePromoter {
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
bool CanInsertStoresInExitBlocks;
+ // This flag will be used to make sure that every sinken, conditional store
+ // instruction is executed conditionally within the exit blocks. In the
+ // preheader, it is initialized to 0. In every basic block containing a
+ // conditional store it is raised.
+ bool ConditionalAccessShouldBePromoted;
+ SSAUpdater &FlagSSAUpdater;
ArrayRef<const Instruction *> Uses;
// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
@@ -1839,6 +1850,17 @@ class LoopPromoter : public LoadAndStorePromoter {
return PN;
}
+ void promoteConditionalAccess(BasicBlock *ExitBlock, Value *LiveInValue,
+ Value *PtrToExitBB,
+ BasicBlock::iterator InsertPos) {
+ Value *FlagValue = FlagSSAUpdater.GetValueInMiddleOfBlock(ExitBlock);
+ IRBuilder<> Builder(&*InsertPos);
+ Type *DataType = LiveInValue->getType();
+ Value *Ptr = Builder.CreatePointerCast(PtrToExitBB,
+ PointerType::getUnqual(DataType));
+ Builder.CreateConditionalStore(LiveInValue, Ptr, Alignment, FlagValue);
+ }
+
public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
SmallVectorImpl<BasicBlock *> &LEB,
@@ -1846,13 +1868,17 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
+ ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks,
+ bool ConditionalAccessShouldBePromoted,
+ SSAUpdater &FlagSSAUpdater)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
LI(li), DL(std::move(dl)), Alignment(Alignment),
UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo),
- CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
+ CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks),
+ ConditionalAccessShouldBePromoted(ConditionalAccessShouldBePromoted),
+ FlagSSAUpdater(FlagSSAUpdater), Uses(Insts) {}
void insertStoresInLoopExitBlocks() {
// Insert stores after in the loop exit blocks. Each exit block gets a
@@ -1866,6 +1892,10 @@ class LoopPromoter : public LoadAndStorePromoter {
LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
BasicBlock::iterator InsertPos = LoopInsertPts[i];
+ if (ConditionalAccessShouldBePromoted) {
+ promoteConditionalAccess(ExitBlock, LiveInValue, Ptr, InsertPos);
+ continue;
+ }
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
@@ -2042,6 +2072,9 @@ bool llvm::promoteLoopAccessesToScalars(
bool SawNotAtomic = false;
AAMDNodes AATags;
+ bool SawConditionalLIStore = false;
+ StringRef PointerOperandName;
+
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
// If there are reads outside the promoted set, then promoting stores is
@@ -2119,6 +2152,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (StoreSafety == StoreSafetyUnknown)
StoreSafety = StoreSafe;
Alignment = std::max(Alignment, InstAlignment);
+ } else if (SetLicmConditionalAccessPromotion &&
+ (!SawConditionalLIStore || (InstAlignment > Alignment))) {
+ SawConditionalLIStore = true;
+ if (PointerOperandName.empty())
+ PointerOperandName = Store->getPointerOperand()->getName();
+ Alignment = std::max(Alignment, InstAlignment);
}
// If a store dominates all exit blocks, it is safe to sink.
@@ -2199,6 +2238,29 @@ bool llvm::promoteLoopAccessesToScalars(
// If we cannot hoist the load either, give up.
return false;
+ const bool PromoteConditionalAccesses =
+ SetLicmConditionalAccessPromotion && SawConditionalLIStore;
+ bool ConditionalAccessShouldBePromoted = false;
+ SmallVector<PHINode *, 16> FlagPHIs;
+ SSAUpdater FlagSSAUpdater(&FlagPHIs);
+ if (StoreSafety == StoreSafetyUnknown && PromoteConditionalAccesses) {
+ ConditionalAccessShouldBePromoted = true;
+ // If we are allowed to promote conditional stores, store promotion is safe
+ StoreSafety = StoreSafe;
+ Type *Int1Ty = Type::getInt1Ty(Preheader->getParent()->getContext());
+ FlagSSAUpdater.Initialize(Int1Ty, PointerOperandName.str() + ".flag");
+ // Initialize the flag with 0 in the preheader.
+ FlagSSAUpdater.AddAvailableValue(Preheader,
+ ConstantInt::get(Int1Ty,
+ /* Value */ 0));
+ for (auto *UI : LoopUses)
+ if (StoreInst *ConditionalLIStore = dyn_cast<StoreInst>(UI))
+ // Raise the flag if a conditional store happened.
+ FlagSSAUpdater.AddAvailableValue(ConditionalLIStore->getParent(),
+ ConstantInt::get(Int1Ty,
+ /* Value */ 1));
+ }
+
// Lets do the promotion!
if (StoreSafety == StoreSafe) {
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
@@ -2228,7 +2290,8 @@ bool llvm::promoteLoopAccessesToScalars(
LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
MSSAInsertPts, PIC, MSSAU, *LI,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93999
More information about the llvm-commits
mailing list