[llvm] [LICM] Promote conditional, loop-invariant memory accesses to scalars with intrinsic (PR #93999)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 31 11:04:20 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-transforms

Author: Ivan Shumakov (ii-sc)

<details>
<summary>Changes</summary>

There is a missed opportunity to promote conditional store in the LICM pass. It has been implemented in this old patch: 

https://reviews.llvm.org/D115244

This patch has a minor flaw: on some architectures masked store intrinsic lowers to vector instructions. For example, on RISC-V architecture this code
```cpp
int res;

void test(int * restrict a, int N) {
    for (int i = 0; i < N; ++i)
        if (a[i])
            ++res;
}
```
translates to the assembler with vector instructions:
```asm
.LBB0_2:                                # %for.cond.cleanup
	andi	a4, a4, 1
	vsetivli	zero, 1, e32, mf2, ta, ma
	vmv.v.x	v9, a4
	vmv.v.x	v8, a3
	vsetvli	zero, zero, e8, mf8, ta, ma
	vmsne.vi	v0, v9, 0
	vse32.v	v8, (a2), v0.t
```
which is unnecessary here. 

I have implemented `conditional_store` intrinsic for this patch in addition to the original one. 

---

Patch is 32.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93999.diff


20 Files Affected:

- (modified) llvm/include/llvm/IR/IRBuilder.h (+4) 
- (modified) llvm/include/llvm/IR/Intrinsics.td (+15) 
- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (modified) llvm/include/llvm/LinkAllPasses.h (+1) 
- (modified) llvm/include/llvm/Transforms/Scalar.h (+5) 
- (added) llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h (+30) 
- (modified) llvm/lib/IR/IRBuilder.cpp (+16) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7-3) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-1) 
- (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+3) 
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1) 
- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+66-3) 
- (added) llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp (+115) 
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1) 
- (added) llvm/test/Transforms/LICM/conditional-store-intrinsic.ll (+22) 
- (added) llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll (+76) 
- (added) llvm/test/Transforms/LICM/promote-conditional-store-intr.ll (+69) 


``````````diff
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 40a9cf507248a..06fc284c7a824 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -818,6 +818,10 @@ class IRBuilderBase {
   /// Create a call to llvm.threadlocal.address intrinsic.
   CallInst *CreateThreadLocalAddress(Value *Ptr);
 
+  // Create a call to a Conditional Store intrinsic
+  CallInst *CreateConditionalStore(Value *Val, Value *Ptr, Align Alignment,
+                                   Value *Condition);
+
   /// Create a call to Masked Load intrinsic
   CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask,
                              Value *PassThru = nullptr, const Twine &Name = "");
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 107442623ab7b..95a9f6cc04de2 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2322,6 +2322,21 @@ def int_vp_is_fpclass:
                                 llvm_i32_ty],
                               [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
 
+//===-------------------------- Conditional Intrinsics --------------------===//
+//
+
+def int_conditional_store:
+  DefaultAttrsIntrinsic</*ret_types*/[],
+                        /*param_types*/[/*Val*/llvm_any_ty,
+                                        /*Ptr*/llvm_anyptr_ty,
+                                        /*Alignment*/llvm_i32_ty,
+                                        /*Condition*/llvm_i1_ty],
+                        /*intr_properties*/[IntrWriteMem,
+                                            IntrArgMemOnly,
+                                            IntrWillReturn,
+                 /*Alignment is a constant*/ImmArg<ArgIndex<2>>,
+                                            NoCapture<ArgIndex<1>>]>;
+
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_load:
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9ba75d491c1c9..c36f035e00bdc 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -169,6 +169,7 @@ void initializeLoopUnrollPass(PassRegistry&);
 void initializeLowerAtomicLegacyPassPass(PassRegistry&);
 void initializeLowerConstantIntrinsicsPass(PassRegistry&);
 void initializeLowerEmuTLSPass(PassRegistry&);
+void initializeLowerConditionalStoreIntrinsicLegacyPass(PassRegistry &);
 void initializeLowerGlobalDtorsLegacyPassPass(PassRegistry &);
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokeLegacyPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 30e7c22f31460..042a0e8768380 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -90,6 +90,7 @@ namespace {
       (void) llvm::createLoopStrengthReducePass();
       (void) llvm::createLoopUnrollPass();
       (void) llvm::createLowerConstantIntrinsicsPass();
+      (void)llvm::createLowerConditionalStoreIntrinsicPass();
       (void) llvm::createLowerGlobalDtorsLegacyPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSwitchPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index f74a49785e11b..962f5c75a01e6 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -143,6 +143,11 @@ Pass *createMergeICmpsLegacyPass();
 FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
 extern char &InferAddressSpacesID;
 
+//===----------------------------------------------------------------------===//
+//
+//  Lower conditional store intrinsic
+FunctionPass *createLowerConditionalStoreIntrinsicPass();
+
 //===----------------------------------------------------------------------===//
 //
 // TLSVariableHoist - This pass reduce duplicated TLS address call.
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
new file mode 100644
index 0000000000000..f3b6f6ce2a185
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
@@ -0,0 +1,30 @@
+//===- LowerConditionalStoreIntrinsic.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///  Pass for early lowering of conditional store.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct LowerConditionalStoreIntrinsicPass
+    : PassInfoMixin<LowerConditionalStoreIntrinsicPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b32799355d692..4a0ba04a86e24 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -566,6 +566,22 @@ Instruction *IRBuilderBase::CreateNoAliasScopeDeclaration(Value *Scope) {
   return CreateCall(FnIntrinsic, {Scope});
 }
 
+/// Create a call to a Conditional Store intrinsic.
+/// \p Val       - data to be stored,
+/// \p Ptr       - base pointer for the store
+/// \p Alignment - alignment of the destination location
+/// \p Condition - boolean that indicates if store should be performed
+CallInst *IRBuilderBase::CreateConditionalStore(Value *Val, Value *Ptr,
+                                                Align Alignment,
+                                                Value *Condition) {
+  auto *PtrTy = cast<PointerType>(Ptr->getType());
+  Type *DataTy = Val->getType();
+  Type *OverloadedTypes[] = {DataTy, PtrTy};
+  Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Condition};
+  return CreateMaskedIntrinsic(Intrinsic::conditional_store, Ops,
+                               OverloadedTypes);
+}
+
 /// Create a call to a Masked Load intrinsic.
 /// \p Ty        - vector type to load
 /// \p Ptr       - base pointer for the load
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..fe6b75c3b8edd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -232,6 +232,7 @@
 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
 #include "llvm/Transforms/Scalar/LowerAtomicPass.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1892e16a06528..382c57c0375e0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
@@ -499,7 +500,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
                                               /*UseMemorySSA=*/false,
                                               /*UseBlockFrequencyInfo=*/false));
-
+  FPM.addPass(LowerConditionalStoreIntrinsicPass());
   // Delete small array after loop unroll.
   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
 
@@ -691,7 +692,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
                                               /*UseMemorySSA=*/false,
                                               /*UseBlockFrequencyInfo=*/false));
-
+  FPM.addPass(LowerConditionalStoreIntrinsicPass());
   // Delete small array after loop unroll.
   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
 
@@ -744,7 +745,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                /*AllowSpeculation=*/true),
       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
-
+  FPM.addPass(LowerConditionalStoreIntrinsicPass());
   FPM.addPass(CoroElidePass());
 
   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
@@ -1279,6 +1280,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
     ExtraPasses.addPass(InstCombinePass());
     FPM.addPass(std::move(ExtraPasses));
+    FPM.addPass(LowerConditionalStoreIntrinsicPass());
   }
 
   // Now that we've formed fast to execute loop structures, we do further
@@ -1354,6 +1356,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                /*AllowSpeculation=*/true),
       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+  FPM.addPass(LowerConditionalStoreIntrinsicPass());
 
   // Now that we've vectorized and unrolled loops, we may have more refined
   // alignment information, try to re-derive it here.
@@ -1950,6 +1953,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                /*AllowSpeculation=*/true),
       /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+  MainFPM.addPass(LowerConditionalStoreIntrinsicPass());
 
   if (RunNewGVN)
     MainFPM.addPass(NewGVNPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..04d01f7d7310e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -385,6 +385,7 @@ FUNCTION_PASS("lower-allow-check", LowerAllowCheckPass())
 FUNCTION_PASS("lower-atomic", LowerAtomicPass())
 FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
 FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-conditional-store", LowerConditionalStoreIntrinsicPass())
 FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
 FUNCTION_PASS("lower-invoke", LowerInvokePass())
 FUNCTION_PASS("lower-switch", LowerSwitchPass())
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 945ab5cf1f303..db8eb7a952335 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -619,6 +619,9 @@ void AArch64PassConfig::addIRPasses() {
     // Do loop invariant code motion in case part of the lowered result is
     // invariant.
     addPass(createLICMPass());
+    // This pass expands conditional store intrinsics,
+    //  which are not supported in the target
+    addPass(createLowerConditionalStoreIntrinsicPass());
   }
 
   TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbbfe34a63863..a865c81d430e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1046,8 +1046,12 @@ void AMDGPUPassConfig::addIRPasses() {
 
     // Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
     // have expanded.
-    if (TM.getOptLevel() > CodeGenOptLevel::Less)
+    if (TM.getOptLevel() > CodeGenOptLevel::Less) {
       addPass(createLICMPass());
+      // This pass expands conditional store intrinsics,
+      //  which are not supported in the target
+      addPass(createLowerConditionalStoreIntrinsicPass());
+    }
   }
 
   TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 714cf69827a1e..3650f4c2121e6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -485,6 +485,9 @@ void PPCPassConfig::addIRPasses() {
     // Do loop invariant code motion in case part of the lowered result is
     // invariant.
     addPass(createLICMPass());
+    // This pass expands conditional store intrinsics,
+    //  which are not supported in the target
+    addPass(createLowerConditionalStoreIntrinsicPass());
   }
 
   TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4..bdecc839729c9 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_component_library(LLVMScalarOpts
   LoopUnrollAndJamPass.cpp
   LoopVersioningLICM.cpp
   LowerAtomicPass.cpp
+  LowerConditionalStoreIntrinsic.cpp
   LowerConstantIntrinsics.cpp
   LowerExpectIntrinsic.cpp
   LowerGuardIntrinsic.cpp
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 5eccf7b4adb65..009b5f3bb7bcc 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -157,6 +157,11 @@ cl::opt<unsigned> llvm::SetLicmMssaOptCap(
     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
              "for faster compile. Caps the MemorySSA clobbering calls."));
 
+cl::opt<bool> SetLicmConditionalAccessPromotion(
+    "licm-conditional-access-promotion", cl::Hidden, cl::init(true),
+    cl::desc("Enable promotion of conditional accesses of loop-invariant"
+             " locations"));
+
 // Experimentally, memory promotion carries less importance than sinking and
 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
 // compile time.
@@ -1819,6 +1824,12 @@ class LoopPromoter : public LoadAndStorePromoter {
   AAMDNodes AATags;
   ICFLoopSafetyInfo &SafetyInfo;
   bool CanInsertStoresInExitBlocks;
+  // This flag will be used to make sure that every sinken, conditional store
+  // instruction is executed conditionally within the exit blocks. In the
+  // preheader, it is initialized to 0. In every basic block containing a
+  // conditional store it is raised.
+  bool ConditionalAccessShouldBePromoted;
+  SSAUpdater &FlagSSAUpdater;
   ArrayRef<const Instruction *> Uses;
 
   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
@@ -1839,6 +1850,17 @@ class LoopPromoter : public LoadAndStorePromoter {
     return PN;
   }
 
+  void promoteConditionalAccess(BasicBlock *ExitBlock, Value *LiveInValue,
+                                Value *PtrToExitBB,
+                                BasicBlock::iterator InsertPos) {
+    Value *FlagValue = FlagSSAUpdater.GetValueInMiddleOfBlock(ExitBlock);
+    IRBuilder<> Builder(&*InsertPos);
+    Type *DataType = LiveInValue->getType();
+    Value *Ptr = Builder.CreatePointerCast(PtrToExitBB,
+                                           PointerType::getUnqual(DataType));
+    Builder.CreateConditionalStore(LiveInValue, Ptr, Alignment, FlagValue);
+  }
+
 public:
   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
                SmallVectorImpl<BasicBlock *> &LEB,
@@ -1846,13 +1868,17 @@ class LoopPromoter : public LoadAndStorePromoter {
                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
-               ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
+               ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks,
+               bool ConditionalAccessShouldBePromoted,
+               SSAUpdater &FlagSSAUpdater)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
         LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
         LI(li), DL(std::move(dl)), Alignment(Alignment),
         UnorderedAtomic(UnorderedAtomic), AATags(AATags),
         SafetyInfo(SafetyInfo),
-        CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
+        CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks),
+        ConditionalAccessShouldBePromoted(ConditionalAccessShouldBePromoted),
+        FlagSSAUpdater(FlagSSAUpdater), Uses(Insts) {}
 
   void insertStoresInLoopExitBlocks() {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
@@ -1866,6 +1892,10 @@ class LoopPromoter : public LoadAndStorePromoter {
       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
       BasicBlock::iterator InsertPos = LoopInsertPts[i];
+      if (ConditionalAccessShouldBePromoted) {
+        promoteConditionalAccess(ExitBlock, LiveInValue, Ptr, InsertPos);
+        continue;
+      }
       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
       if (UnorderedAtomic)
         NewSI->setOrdering(AtomicOrdering::Unordered);
@@ -2042,6 +2072,9 @@ bool llvm::promoteLoopAccessesToScalars(
   bool SawNotAtomic = false;
   AAMDNodes AATags;
 
+  bool SawConditionalLIStore = false;
+  StringRef PointerOperandName;
+
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
   // If there are reads outside the promoted set, then promoting stores is
@@ -2119,6 +2152,12 @@ bool llvm::promoteLoopAccessesToScalars(
           if (StoreSafety == StoreSafetyUnknown)
             StoreSafety = StoreSafe;
           Alignment = std::max(Alignment, InstAlignment);
+        } else if (SetLicmConditionalAccessPromotion &&
+                   (!SawConditionalLIStore || (InstAlignment > Alignment))) {
+          SawConditionalLIStore = true;
+          if (PointerOperandName.empty())
+            PointerOperandName = Store->getPointerOperand()->getName();
+          Alignment = std::max(Alignment, InstAlignment);
         }
 
         // If a store dominates all exit blocks, it is safe to sink.
@@ -2199,6 +2238,29 @@ bool llvm::promoteLoopAccessesToScalars(
     // If we cannot hoist the load either, give up.
     return false;
 
+  const bool PromoteConditionalAccesses =
+      SetLicmConditionalAccessPromotion && SawConditionalLIStore;
+  bool ConditionalAccessShouldBePromoted = false;
+  SmallVector<PHINode *, 16> FlagPHIs;
+  SSAUpdater FlagSSAUpdater(&FlagPHIs);
+  if (StoreSafety == StoreSafetyUnknown && PromoteConditionalAccesses) {
+    ConditionalAccessShouldBePromoted = true;
+    // If we are allowed to promote conditional stores, store promotion is safe
+    StoreSafety = StoreSafe;
+    Type *Int1Ty = Type::getInt1Ty(Preheader->getParent()->getContext());
+    FlagSSAUpdater.Initialize(Int1Ty, PointerOperandName.str() + ".flag");
+    // Initialize the flag with 0 in the preheader.
+    FlagSSAUpdater.AddAvailableValue(Preheader,
+                                     ConstantInt::get(Int1Ty,
+                                                      /* Value */ 0));
+    for (auto *UI : LoopUses)
+      if (StoreInst *ConditionalLIStore = dyn_cast<StoreInst>(UI))
+        // Raise the flag if a conditional store happened.
+        FlagSSAUpdater.AddAvailableValue(ConditionalLIStore->getParent(),
+                                         ConstantInt::get(Int1Ty,
+                                                          /* Value */ 1));
+  }
+
   // Lets do the promotion!
   if (StoreSafety == StoreSafe) {
     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
@@ -2228,7 +2290,8 @@ bool llvm::promoteLoopAccessesToScalars(
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
                         MSSAInsertPts, PIC, MSSAU, *LI,...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/93999


More information about the llvm-commits mailing list