[llvm] [LICM] Promote conditional, loop-invariant memory accesses to scalars with intrinsic (PR #93999)
Ivan Shumakov via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 11:03:32 PDT 2024
https://github.com/ii-sc created https://github.com/llvm/llvm-project/pull/93999
There is a missed opportunity to promote conditional store in the LICM pass. It has been implemented in this old patch:
https://reviews.llvm.org/D115244
This patch has a minor flaw: on some architectures masked store intrinsic lowers to vector instructions. For example, on RISC-V architecture this code
```cpp
int res;
void test(int * restrict a, int N) {
for (int i = 0; i < N; ++i)
if (a[i])
++res;
}
```
translates to the assembler with vector instructions:
```asm
.LBB0_2: # %for.cond.cleanup
andi a4, a4, 1
vsetivli zero, 1, e32, mf2, ta, ma
vmv.v.x v9, a4
vmv.v.x v8, a3
vsetvli zero, zero, e8, mf8, ta, ma
vmsne.vi v0, v9, 0
vse32.v v8, (a2), v0.t
```
which is unnecessary here.
I have implemented `conditional_store` intrinsic for this patch in addition to the original one.
>From 59a0f70995a1815df45da0144ebdff92e20d259a Mon Sep 17 00:00:00 2001
From: Ivan Shumakov <ivan.shumakov at syntacore.com>
Date: Mon, 20 May 2024 12:05:39 +0300
Subject: [PATCH 1/2] [LLVM] Store promotion possibility
---
...conditional-store-promotion-possibility.ll | 66 +++++++++++++++++++
1 file changed, 66 insertions(+)
create mode 100644 llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
diff --git a/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll b/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
new file mode 100644
index 0000000000000..b694ea102ff09
--- /dev/null
+++ b/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -passes=licm < %s | FileCheck %s
+ at res = dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local void @test(ptr noalias nocapture noundef readonly %a, i32 noundef signext %N) local_unnamed_addr #0 {
+ ; Preheader:
+ entry:
+ br label %for.cond
+
+ ; Loop:
+ for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+ for.body: ; preds = %for.cond
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ %tobool.not = icmp eq i32 %0, 0
+ br i1 %tobool.not, label %for.inc, label %if.then
+
+ if.then: ; preds = %for.body
+ %1 = load i32, ptr @res, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, ptr @res, align 4
+ br label %for.inc
+
+ for.inc: ; preds = %for.body, %if.then
+ %inc1 = add nuw nsw i32 %i.0, 1
+ br label %for.cond
+
+ ; Exit blocks
+ for.cond.cleanup: ; preds = %for.cond
+ ret void
+}
+
+; CHECK: entry:
+; CHECK: %res.promoted = load i32, ptr @res, align 4
+; CHECK: br label %for.cond
+
+; CHECK: for.cond:
+; CHECK: %inc3 = phi i32 [ %res.promoted, %entry ], [ %inc2, %for.inc ]
+; CHECK: %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+; CHECK: %cmp = icmp slt i32 %i.0, %N
+; CHECK: br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+; CHECK: for.body:
+; CHECK: %idxprom = zext i32 %i.0 to i64
+; CHECK: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+; CHECK: %0 = load i32, ptr %arrayidx, align 4
+; CHECK: %tobool.not = icmp eq i32 %0, 0
+; CHECK: br i1 %tobool.not, label %for.inc, label %if.then
+
+; CHECK: if.then:
+; CHECK: %inc = add nsw i32 %inc3, 1
+; CHECK: store i32 %inc, ptr @res, align 4
+; CHECK: br label %for.inc
+
+; CHECK: for.inc:
+; CHECK: %inc2 = phi i32 [ %inc, %if.then ], [ %inc3, %for.body ]
+; CHECK: %inc1 = add nuw nsw i32 %i.0, 1
+; CHECK: br label %for.cond
+
+; CHECK: for.cond.cleanup:
+ ; CHECK: ret void
+; CHECK: }
\ No newline at end of file
>From 28ead3aefcf9cf1e56fcd47f3c0f71f7c29422ec Mon Sep 17 00:00:00 2001
From: Ivan Shumakov <ivan.shumakov at syntacore.com>
Date: Mon, 8 Apr 2024 19:18:22 +0300
Subject: [PATCH 2/2] [LLVM] Conditional store promotion with corresponding
intrinsic have been added
---
llvm/include/llvm/IR/IRBuilder.h | 4 +
llvm/include/llvm/IR/Intrinsics.td | 15 +++
llvm/include/llvm/InitializePasses.h | 1 +
llvm/include/llvm/LinkAllPasses.h | 1 +
llvm/include/llvm/Transforms/Scalar.h | 5 +
.../Scalar/LowerConditionalStoreIntrinsic.h | 30 +++++
llvm/lib/IR/IRBuilder.cpp | 16 +++
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassBuilderPipelines.cpp | 10 +-
llvm/lib/Passes/PassRegistry.def | 1 +
.../Target/AArch64/AArch64TargetMachine.cpp | 3 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +-
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 3 +
llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 +
llvm/lib/Transforms/Scalar/LICM.cpp | 69 ++++++++++-
.../Scalar/LowerConditionalStoreIntrinsic.cpp | 115 ++++++++++++++++++
llvm/lib/Transforms/Scalar/Scalar.cpp | 1 +
.../LICM/conditional-store-intrinsic.ll | 22 ++++
...conditional-store-promotion-possibility.ll | 18 ++-
.../LICM/promote-conditional-store-intr.ll | 69 +++++++++++
20 files changed, 380 insertions(+), 11 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
create mode 100644 llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp
create mode 100644 llvm/test/Transforms/LICM/conditional-store-intrinsic.ll
create mode 100644 llvm/test/Transforms/LICM/promote-conditional-store-intr.ll
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 40a9cf507248a..06fc284c7a824 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -818,6 +818,10 @@ class IRBuilderBase {
/// Create a call to llvm.threadlocal.address intrinsic.
CallInst *CreateThreadLocalAddress(Value *Ptr);
+ // Create a call to a Conditional Store intrinsic
+ CallInst *CreateConditionalStore(Value *Val, Value *Ptr, Align Alignment,
+ Value *Condition);
+
/// Create a call to Masked Load intrinsic
CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask,
Value *PassThru = nullptr, const Twine &Name = "");
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 107442623ab7b..95a9f6cc04de2 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2322,6 +2322,21 @@ def int_vp_is_fpclass:
llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
+//===-------------------------- Conditional Intrinsics --------------------===//
+//
+
+def int_conditional_store:
+ DefaultAttrsIntrinsic</*ret_types*/[],
+ /*param_types*/[/*Val*/llvm_any_ty,
+ /*Ptr*/llvm_anyptr_ty,
+ /*Alignment*/llvm_i32_ty,
+ /*Condition*/llvm_i1_ty],
+ /*intr_properties*/[IntrWriteMem,
+ IntrArgMemOnly,
+ IntrWillReturn,
+ /*Alignment is a constant*/ImmArg<ArgIndex<2>>,
+ NoCapture<ArgIndex<1>>]>;
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_load:
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9ba75d491c1c9..c36f035e00bdc 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -169,6 +169,7 @@ void initializeLoopUnrollPass(PassRegistry&);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
+void initializeLowerConditionalStoreIntrinsicLegacyPass(PassRegistry &);
void initializeLowerGlobalDtorsLegacyPassPass(PassRegistry &);
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 30e7c22f31460..042a0e8768380 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -90,6 +90,7 @@ namespace {
(void) llvm::createLoopStrengthReducePass();
(void) llvm::createLoopUnrollPass();
(void) llvm::createLowerConstantIntrinsicsPass();
+ (void)llvm::createLowerConditionalStoreIntrinsicPass();
(void) llvm::createLowerGlobalDtorsLegacyPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index f74a49785e11b..962f5c75a01e6 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -143,6 +143,11 @@ Pass *createMergeICmpsLegacyPass();
FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
extern char &InferAddressSpacesID;
+//===----------------------------------------------------------------------===//
+//
+// Lower conditional store intrinsic
+FunctionPass *createLowerConditionalStoreIntrinsicPass();
+
//===----------------------------------------------------------------------===//
//
// TLSVariableHoist - This pass reduce duplicated TLS address call.
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
new file mode 100644
index 0000000000000..f3b6f6ce2a185
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h
@@ -0,0 +1,30 @@
+//===- LowerConditionalStoreIntrinsic.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass for early lowering of conditional store.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERCONDSTOREINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct LowerConditionalStoreIntrinsicPass
+ : PassInfoMixin<LowerConditionalStoreIntrinsicPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b32799355d692..4a0ba04a86e24 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -566,6 +566,22 @@ Instruction *IRBuilderBase::CreateNoAliasScopeDeclaration(Value *Scope) {
return CreateCall(FnIntrinsic, {Scope});
}
+/// Create a call to a Conditional Store intrinsic.
+/// \p Val - data to be stored,
+/// \p Ptr - base pointer for the store
+/// \p Alignment - alignment of the destination location
+/// \p Condition - boolean that indicates if store should be performed
+CallInst *IRBuilderBase::CreateConditionalStore(Value *Val, Value *Ptr,
+ Align Alignment,
+ Value *Condition) {
+ auto *PtrTy = cast<PointerType>(Ptr->getType());
+ Type *DataTy = Val->getType();
+ Type *OverloadedTypes[] = {DataTy, PtrTy};
+ Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Condition};
+ return CreateMaskedIntrinsic(Intrinsic::conditional_store, Ops,
+ OverloadedTypes);
+}
+
/// Create a call to a Masked Load intrinsic.
/// \p Ty - vector type to load
/// \p Ptr - base pointer for the load
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..fe6b75c3b8edd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -232,6 +232,7 @@
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1892e16a06528..382c57c0375e0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -110,6 +110,7 @@
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
@@ -499,7 +500,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -691,7 +692,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -744,7 +745,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
-
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
FPM.addPass(CoroElidePass());
invokeScalarOptimizerLateEPCallbacks(FPM, Level);
@@ -1279,6 +1280,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
ExtraPasses.addPass(InstCombinePass());
FPM.addPass(std::move(ExtraPasses));
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
}
// Now that we've formed fast to execute loop structures, we do further
@@ -1354,6 +1356,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ FPM.addPass(LowerConditionalStoreIntrinsicPass());
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1950,6 +1953,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ MainFPM.addPass(LowerConditionalStoreIntrinsicPass());
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..04d01f7d7310e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -385,6 +385,7 @@ FUNCTION_PASS("lower-allow-check", LowerAllowCheckPass())
FUNCTION_PASS("lower-atomic", LowerAtomicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-conditional-store", LowerConditionalStoreIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-invoke", LowerInvokePass())
FUNCTION_PASS("lower-switch", LowerSwitchPass())
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 945ab5cf1f303..db8eb7a952335 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -619,6 +619,9 @@ void AArch64PassConfig::addIRPasses() {
// Do loop invariant code motion in case part of the lowered result is
// invariant.
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbbfe34a63863..a865c81d430e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1046,8 +1046,12 @@ void AMDGPUPassConfig::addIRPasses() {
// Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
// have expanded.
- if (TM.getOptLevel() > CodeGenOptLevel::Less)
+ if (TM.getOptLevel() > CodeGenOptLevel::Less) {
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
+ }
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 714cf69827a1e..3650f4c2121e6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -485,6 +485,9 @@ void PPCPassConfig::addIRPasses() {
// Do loop invariant code motion in case part of the lowered result is
// invariant.
addPass(createLICMPass());
+ // This pass expands conditional store intrinsics,
+ // which are not supported in the target
+ addPass(createLowerConditionalStoreIntrinsicPass());
}
TargetPassConfig::addIRPasses();
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4..bdecc839729c9 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_component_library(LLVMScalarOpts
LoopUnrollAndJamPass.cpp
LoopVersioningLICM.cpp
LowerAtomicPass.cpp
+ LowerConditionalStoreIntrinsic.cpp
LowerConstantIntrinsics.cpp
LowerExpectIntrinsic.cpp
LowerGuardIntrinsic.cpp
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 5eccf7b4adb65..009b5f3bb7bcc 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -157,6 +157,11 @@ cl::opt<unsigned> llvm::SetLicmMssaOptCap(
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
+cl::opt<bool> SetLicmConditionalAccessPromotion(
+ "licm-conditional-access-promotion", cl::Hidden, cl::init(true),
+ cl::desc("Enable promotion of conditional accesses of loop-invariant"
+ " locations"));
+
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
@@ -1819,6 +1824,12 @@ class LoopPromoter : public LoadAndStorePromoter {
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
bool CanInsertStoresInExitBlocks;
+ // This flag will be used to make sure that every sinken, conditional store
+ // instruction is executed conditionally within the exit blocks. In the
+ // preheader, it is initialized to 0. In every basic block containing a
+ // conditional store it is raised.
+ bool ConditionalAccessShouldBePromoted;
+ SSAUpdater &FlagSSAUpdater;
ArrayRef<const Instruction *> Uses;
// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
@@ -1839,6 +1850,17 @@ class LoopPromoter : public LoadAndStorePromoter {
return PN;
}
+ void promoteConditionalAccess(BasicBlock *ExitBlock, Value *LiveInValue,
+ Value *PtrToExitBB,
+ BasicBlock::iterator InsertPos) {
+ Value *FlagValue = FlagSSAUpdater.GetValueInMiddleOfBlock(ExitBlock);
+ IRBuilder<> Builder(&*InsertPos);
+ Type *DataType = LiveInValue->getType();
+ Value *Ptr = Builder.CreatePointerCast(PtrToExitBB,
+ PointerType::getUnqual(DataType));
+ Builder.CreateConditionalStore(LiveInValue, Ptr, Alignment, FlagValue);
+ }
+
public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
SmallVectorImpl<BasicBlock *> &LEB,
@@ -1846,13 +1868,17 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
+ ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks,
+ bool ConditionalAccessShouldBePromoted,
+ SSAUpdater &FlagSSAUpdater)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
LI(li), DL(std::move(dl)), Alignment(Alignment),
UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo),
- CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
+ CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks),
+ ConditionalAccessShouldBePromoted(ConditionalAccessShouldBePromoted),
+ FlagSSAUpdater(FlagSSAUpdater), Uses(Insts) {}
void insertStoresInLoopExitBlocks() {
// Insert stores after in the loop exit blocks. Each exit block gets a
@@ -1866,6 +1892,10 @@ class LoopPromoter : public LoadAndStorePromoter {
LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
BasicBlock::iterator InsertPos = LoopInsertPts[i];
+ if (ConditionalAccessShouldBePromoted) {
+ promoteConditionalAccess(ExitBlock, LiveInValue, Ptr, InsertPos);
+ continue;
+ }
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
@@ -2042,6 +2072,9 @@ bool llvm::promoteLoopAccessesToScalars(
bool SawNotAtomic = false;
AAMDNodes AATags;
+ bool SawConditionalLIStore = false;
+ StringRef PointerOperandName;
+
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
// If there are reads outside the promoted set, then promoting stores is
@@ -2119,6 +2152,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (StoreSafety == StoreSafetyUnknown)
StoreSafety = StoreSafe;
Alignment = std::max(Alignment, InstAlignment);
+ } else if (SetLicmConditionalAccessPromotion &&
+ (!SawConditionalLIStore || (InstAlignment > Alignment))) {
+ SawConditionalLIStore = true;
+ if (PointerOperandName.empty())
+ PointerOperandName = Store->getPointerOperand()->getName();
+ Alignment = std::max(Alignment, InstAlignment);
}
// If a store dominates all exit blocks, it is safe to sink.
@@ -2199,6 +2238,29 @@ bool llvm::promoteLoopAccessesToScalars(
// If we cannot hoist the load either, give up.
return false;
+ const bool PromoteConditionalAccesses =
+ SetLicmConditionalAccessPromotion && SawConditionalLIStore;
+ bool ConditionalAccessShouldBePromoted = false;
+ SmallVector<PHINode *, 16> FlagPHIs;
+ SSAUpdater FlagSSAUpdater(&FlagPHIs);
+ if (StoreSafety == StoreSafetyUnknown && PromoteConditionalAccesses) {
+ ConditionalAccessShouldBePromoted = true;
+ // If we are allowed to promote conditional stores, store promotion is safe
+ StoreSafety = StoreSafe;
+ Type *Int1Ty = Type::getInt1Ty(Preheader->getParent()->getContext());
+ FlagSSAUpdater.Initialize(Int1Ty, PointerOperandName.str() + ".flag");
+ // Initialize the flag with 0 in the preheader.
+ FlagSSAUpdater.AddAvailableValue(Preheader,
+ ConstantInt::get(Int1Ty,
+ /* Value */ 0));
+ for (auto *UI : LoopUses)
+ if (StoreInst *ConditionalLIStore = dyn_cast<StoreInst>(UI))
+ // Raise the flag if a conditional store happened.
+ FlagSSAUpdater.AddAvailableValue(ConditionalLIStore->getParent(),
+ ConstantInt::get(Int1Ty,
+ /* Value */ 1));
+ }
+
// Lets do the promotion!
if (StoreSafety == StoreSafe) {
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
@@ -2228,7 +2290,8 @@ bool llvm::promoteLoopAccessesToScalars(
LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
SawUnorderedAtomic, AATags, *SafetyInfo,
- StoreSafety == StoreSafe);
+ StoreSafety == StoreSafe,
+ ConditionalAccessShouldBePromoted, FlagSSAUpdater);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp
new file mode 100644
index 0000000000000..1a69e053e11d2
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LowerConditionalStoreIntrinsic.cpp
@@ -0,0 +1,115 @@
+//===- LowerConditionalStoreIntrinsic.cpp -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerConditionalStoreIntrinsic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lower-cond-store-intrinsic"
+
+// Conditional store intrinsic removal:
+// block:
+// llvm.conditional.store.*(Val, Ptr, Condition)
+// |
+// V
+// block:
+// br %i1 Condition, label cond.store, label block.remaining
+// cond.tore:
+// store * Val, Ptr
+// br label block.remaining
+// block.remaining:
+
+static bool isCondStoreIntr(Instruction &Instr) {
+ CallInst *CI = dyn_cast<CallInst>(&Instr);
+ if (!CI)
+ return false;
+
+ Function *Fn = CI->getCalledFunction();
+ if (Fn && Fn->getIntrinsicID() == Intrinsic::conditional_store)
+ return true;
+ return false;
+}
+
+static void lowerCondStoreIntr(Instruction &Instr, BasicBlock &BB) {
+ LLVM_DEBUG(dbgs() << "Found basic with conditional store: " << BB.getName()
+ << "\n");
+ auto *Val = Instr.getOperand(0);
+ auto *Ptr = Instr.getOperand(1);
+ auto *AlignmentVal = dyn_cast<ConstantInt>(Instr.getOperand(2));
+ auto Alignment = MaybeAlign(AlignmentVal->getValue().getLimitedValue());
+ assert(AlignmentVal && "Invalid intrinsic operands");
+ auto *Cond = Instr.getOperand(3);
+
+ Instruction *ThenBlock =
+ SplitBlockAndInsertIfThen(Cond, &Instr, /*Unreachable*/ false);
+
+ IRBuilder<> IB(ThenBlock);
+ IB.CreateAlignedStore(Val, Ptr, Alignment);
+
+ Instr.eraseFromParent();
+}
+
+static bool lowerCondStoreIntrinsicForFunc(Function &F) {
+ bool Changed = false;
+
+ for (BasicBlock &BB : F)
+ for (Instruction &Instr : llvm::make_early_inc_range(llvm::reverse(BB)))
+ if (isCondStoreIntr(Instr)) {
+ lowerCondStoreIntr(Instr, BB);
+ Changed = true;
+ }
+ return Changed;
+}
+
+PreservedAnalyses
+LowerConditionalStoreIntrinsicPass::run(Function &F,
+ FunctionAnalysisManager &) {
+ if (lowerCondStoreIntrinsicForFunc(F))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class LowerConditionalStoreIntrinsicLegacy : public FunctionPass {
+ LowerConditionalStoreIntrinsicPass Impl;
+
+public:
+ static char ID;
+ LowerConditionalStoreIntrinsicLegacy() : FunctionPass(ID) {
+ initializeLowerConditionalStoreIntrinsicLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ // Don't skip optnone functions; atomics still need to be lowered.
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
+};
+} // namespace
+
+char LowerConditionalStoreIntrinsicLegacy::ID = 0;
+INITIALIZE_PASS(LowerConditionalStoreIntrinsicLegacy, "lower-conditional-store",
+ "Lower conditional store", false, false)
+
+FunctionPass *llvm::createLowerConditionalStoreIntrinsicPass() {
+ return new LowerConditionalStoreIntrinsicLegacy();
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 400b15284c1b8..420f45bf4a570 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -33,6 +33,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopUnrollPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerConstantIntrinsicsPass(Registry);
+ initializeLowerConditionalStoreIntrinsicLegacyPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
diff --git a/llvm/test/Transforms/LICM/conditional-store-intrinsic.ll b/llvm/test/Transforms/LICM/conditional-store-intrinsic.ll
new file mode 100644
index 0000000000000..7fe111eeb641d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/conditional-store-intrinsic.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -passes=lower-conditional-store < %s | FileCheck %s
+define void @foo(ptr %p, i64 %val, i1 %cond) {
+ call void @llvm.conditional.store.i64.p0(i64 %val, ptr %p, i32 4, i1 %cond)
+ call void @llvm.conditional.store.i64.p0(i64 %val, ptr %p, i32 4, i1 %cond)
+ ret void
+}
+
+declare void @llvm.conditional.store.i64.p0(i64, ptr nocapture, i32 immarg, i1)
+
+; CHECK: define void @foo(ptr %p, i64 %val, i1 %cond) {
+; CHECK: br i1 %cond, label %1, label %2
+; CHECK: 1:
+; CHECK: store i64 %val, ptr %p, align 4
+; CHECK: br label %2
+; CHECK: 2:
+; CHECK: br i1 %cond, label %3, label %4
+; CHECK: 3:
+; CHECK: store i64 %val, ptr %p, align 4
+; CHECK: br label %4
+; CHECK: 4:
+; CHECK: ret void
+; CHECK: }
\ No newline at end of file
diff --git a/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll b/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
index b694ea102ff09..809565ab7fe2b 100644
--- a/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
+++ b/llvm/test/Transforms/LICM/conditional-store-promotion-possibility.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -passes=licm < %s | FileCheck %s
+; RUN: opt -S -passes=licm -licm-conditional-access-promotion=true < %s > %t
+; RUN: opt -S -passes=lower-conditional-store < %t | FileCheck %s
@res = dso_local local_unnamed_addr global i32 0, align 4
define dso_local void @test(ptr noalias nocapture noundef readonly %a, i32 noundef signext %N) local_unnamed_addr #0 {
@@ -39,6 +40,7 @@ define dso_local void @test(ptr noalias nocapture noundef readonly %a, i32 nound
; CHECK: br label %for.cond
; CHECK: for.cond:
+; CHECK: %res.flag4 = phi i1 [ false, %entry ], [ %res.flag, %for.inc ]
; CHECK: %inc3 = phi i32 [ %res.promoted, %entry ], [ %inc2, %for.inc ]
; CHECK: %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
; CHECK: %cmp = icmp slt i32 %i.0, %N
@@ -53,14 +55,22 @@ define dso_local void @test(ptr noalias nocapture noundef readonly %a, i32 nound
; CHECK: if.then:
; CHECK: %inc = add nsw i32 %inc3, 1
-; CHECK: store i32 %inc, ptr @res, align 4
; CHECK: br label %for.inc
; CHECK: for.inc:
+; CHECK: %res.flag = phi i1 [ true, %if.then ], [ %res.flag4, %for.body ]
; CHECK: %inc2 = phi i32 [ %inc, %if.then ], [ %inc3, %for.body ]
; CHECK: %inc1 = add nuw nsw i32 %i.0, 1
; CHECK: br label %for.cond
; CHECK: for.cond.cleanup:
- ; CHECK: ret void
-; CHECK: }
\ No newline at end of file
+; CHECK: %res.flag4.lcssa = phi i1 [ %res.flag4, %for.cond ]
+; CHECK: %inc3.lcssa = phi i32 [ %inc3, %for.cond ]
+; CHECK: br i1 %res.flag4.lcssa, label %1, label %2
+
+; CHECK: 1:
+; CHECK: store i32 %inc3.lcssa, ptr @res, align 4
+; CHECK: br label %2
+
+; CHECK: 2:
+; CHECK: ret void
\ No newline at end of file
diff --git a/llvm/test/Transforms/LICM/promote-conditional-store-intr.ll b/llvm/test/Transforms/LICM/promote-conditional-store-intr.ll
new file mode 100644
index 0000000000000..f805b6b674e2e
--- /dev/null
+++ b/llvm/test/Transforms/LICM/promote-conditional-store-intr.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -passes=licm -licm-conditional-access-promotion=true < %s | FileCheck %s
+ at res = dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local void @test(ptr noalias nocapture noundef readonly %a, i32 noundef signext %N) local_unnamed_addr #0 {
+ ; Preheader:
+ entry:
+ br label %for.cond
+
+ ; Loop:
+ for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+ for.body: ; preds = %for.cond
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ %tobool.not = icmp eq i32 %0, 0
+ br i1 %tobool.not, label %for.inc, label %if.then
+
+ if.then: ; preds = %for.body
+ %1 = load i32, ptr @res, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, ptr @res, align 4
+ br label %for.inc
+
+ for.inc: ; preds = %for.body, %if.then
+ %inc1 = add nuw nsw i32 %i.0, 1
+ br label %for.cond
+
+ ; Exit blocks
+ for.cond.cleanup: ; preds = %for.cond
+ ret void
+}
+
+; CHECK: entry:
+; CHECK: %res.promoted = load i32, ptr @res, align 4
+; CHECK: br label %for.cond
+
+; CHECK: for.cond:
+; CHECK: %res.flag4 = phi i1 [ false, %entry ], [ %res.flag, %for.inc ]
+; CHECK: %inc3 = phi i32 [ %res.promoted, %entry ], [ %inc2, %for.inc ]
+; CHECK: %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+; CHECK: %cmp = icmp slt i32 %i.0, %N
+; CHECK: br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+; CHECK: for.body:
+; CHECK: %idxprom = zext i32 %i.0 to i64
+; CHECK: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+; CHECK: %0 = load i32, ptr %arrayidx, align 4
+; CHECK: %tobool.not = icmp eq i32 %0, 0
+; CHECK: br i1 %tobool.not, label %for.inc, label %if.then
+
+; CHECK: if.then:
+; CHECK: %inc = add nsw i32 %inc3, 1
+; CHECK: br label %for.inc
+
+; CHECK: for.inc:
+; CHECK: %res.flag = phi i1 [ true, %if.then ], [ %res.flag4, %for.body ]
+; CHECK: %inc2 = phi i32 [ %inc, %if.then ], [ %inc3, %for.body ]
+; CHECK: %inc1 = add nuw nsw i32 %i.0, 1
+; CHECK: br label %for.cond
+
+; CHECK: for.cond.cleanup:
+; CHECK: %res.flag4.lcssa = phi i1 [ %res.flag4, %for.cond ]
+; CHECK: %inc3.lcssa = phi i32 [ %inc3, %for.cond ]
+; CHECK: call void @llvm.conditional.store.i32.p0(i32 %inc3.lcssa, ptr @res, i32 4, i1 %res.flag4.lcssa)
+; CHECK: ret void
\ No newline at end of file
More information about the llvm-commits
mailing list