[llvm] [AggressiveInstCombine] Implement store merge optimization (PR #147540)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 08:47:06 PDT 2025
================
@@ -842,6 +842,141 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
return true;
}
+/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
+struct PartStore {
+ Value *PtrBase;
+ APInt PtrOffset;
+ Value *Val;
+ uint64_t ValOffset;
+ uint64_t ValWidth;
+ StoreInst *Store;
+
+ bool isCompatibleWith(const PartStore &Other) const {
+ return PtrBase == Other.PtrBase && Val == Other.Val;
+ }
+
+ bool operator<(const PartStore &Other) const {
+ return PtrOffset.slt(Other.PtrOffset);
+ }
+};
+
+static std::optional<PartStore> matchPartStore(Instruction &I,
+ const DataLayout &DL) {
+ auto *Store = dyn_cast<StoreInst>(&I);
+ if (!Store || !Store->isSimple())
+ return std::nullopt;
+
+ Value *StoredVal = Store->getValueOperand();
+ Type *StoredTy = StoredVal->getType();
+ if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
+ return std::nullopt;
+
+ uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
+ uint64_t ValOffset = 0;
+ Value *Val;
+ if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val),
+ m_ConstantInt(ValOffset))),
+ m_Trunc(m_Value(Val)))))
+ return std::nullopt;
+
+ Value *Ptr = Store->getPointerOperand();
+ APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
+ DL, PtrOffset, /*AllowNonInbounds=*/true);
+ return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
+}
+
+static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
+ const DataLayout &DL, TargetTransformInfo &TTI) {
+ if (Parts.size() < 2)
+ return false;
+
+ // We now have multiple parts of the same value stored to the same pointer.
+ // Sort the parts by pointer offset, and make sure they are consistent with
+ // the value offsets. Also check that the value is fully covered without
+ // overlaps.
+ // FIXME: We could support merging stores for only part of the value here.
+ llvm::sort(Parts);
+ int64_t LastEndOffsetFromFirst = 0;
+ const PartStore &First = Parts[0];
+ for (const PartStore &Part : Parts) {
+ APInt PtrOffsetFromFirst = Part.PtrOffset - First.PtrOffset;
+ int64_t ValOffsetFromFirst = Part.ValOffset - First.ValOffset;
+ if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
+ LastEndOffsetFromFirst != ValOffsetFromFirst)
+ return false;
+ LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
+ }
+
+ // Check whether combining the stores is profitable.
+ // FIXME: We could generate smaller stores if we can't produce a large one.
+ LLVMContext &Ctx = First.Store->getContext();
+ Type *NewTy = Type::getIntNTy(Ctx, LastEndOffsetFromFirst);
+ unsigned Fast = 0;
+ if (!TTI.isTypeLegal(NewTy) ||
+ !TTI.allowsMisalignedMemoryAccesses(Ctx, LastEndOffsetFromFirst,
+ First.Store->getPointerAddressSpace(),
+ First.Store->getAlign(), &Fast) ||
+ !Fast)
+ return false;
+
+ // Generate the combined store.
+ IRBuilder<> Builder(First.Store);
+ Value *Val = First.Val;
+ if (First.ValOffset != 0)
+ Val = Builder.CreateLShr(Val, First.ValOffset);
+ Val = Builder.CreateTrunc(Val, NewTy);
+ Value *Ptr = First.PtrBase;
+ if (First.PtrOffset != 0)
+ Ptr = Builder.CreateInBoundsPtrAdd(Ptr, Builder.getInt(First.PtrOffset));
----------------
dtcxzyw wrote:
Use `First.Store->getPointerOperand()` directly
https://github.com/llvm/llvm-project/pull/147540
More information about the llvm-commits
mailing list