[llvm] [LSV] Insert casts to vectorize mismatched types (PR #134436)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 13 01:33:17 PDT 2025
================
@@ -1310,6 +1315,116 @@ std::optional<APInt> Vectorizer::getConstantOffsetSelects(
return std::nullopt;
}
+void Vectorizer::insertCastsToMergeClasses(EquivalenceClassMap &EQClasses) {
+ if (EQClasses.size() < 2)
+ return;
+
+ // For each class, determine if all instructions are of type int, FP or ptr.
+ // This information will help us determine the type instructions should be
+ // casted into.
+ MapVector<EqClassKey, Bitset<3>> ClassAllTy;
+ for (auto C : EQClasses) {
+ if (all_of(EQClasses[C.first], [](Instruction *I) {
+ return I->getType()->isIntOrIntVectorTy();
+ }))
+ ClassAllTy[C.first].set(0);
+ else if (all_of(EQClasses[C.first], [](Instruction *I) {
+ return I->getType()->isFPOrFPVectorTy();
+ }))
+ ClassAllTy[C.first].set(1);
+ else if (all_of(EQClasses[C.first], [](Instruction *I) {
+ return I->getType()->isPtrOrPtrVectorTy();
+ }))
+ ClassAllTy[C.first].set(2);
+ }
+
+ // Loop over all equivalence classes and try to merge them. Keep track of
+ // classes that are merged into others.
+ DenseSet<EqClassKey> ClassesToErase;
+ for (auto EC1 : EQClasses) {
+ for (auto EC2 : EQClasses) {
+ // Skip if EC2 was already merged before, EC1 follows EC2 in the
+ // collection or EC1 is the same as EC2.
+ if (ClassesToErase.contains(EC2.first) || EC1 <= EC2 ||
+ EC1.first == EC2.first)
+ continue;
+
+ auto [Ptr1, AS1, TySize1, IsLoad1] = EC1.first;
+ auto [Ptr2, AS2, TySize2, IsLoad2] = EC2.first;
+
+ // Attempt to merge EC2 into EC1. Skip if the pointers, address spaces or
+ // whether the leader instruction is a load/store are different. Also skip
+ // if the scalar bitwidth of the first equivalence class is smaller than
+ // the second one to avoid reconsidering the same equivalence class pair.
+ if (Ptr1 != Ptr2 || AS1 != AS2 || IsLoad1 != IsLoad2 || TySize1 < TySize2)
+ continue;
+
+ // Ensure all instructions in EC2 can be bitcasted into NewTy.
+ /// TODO: NewTyBits is needed as stuctured binded variables cannot be
+ /// captured by a lambda until C++20.
+ auto NewTyBits = std::get<2>(EC1.first);
+ if (any_of(EC2.second, [&](Instruction *I) {
+ return DL.getTypeSizeInBits(getLoadStoreType(I)) != NewTyBits;
+ }))
+ continue;
+
+ // Create a new type for the equivalence class.
+ auto &Ctx = EC2.second[0]->getContext();
+ Type *NewTy = Type::getIntNTy(EC2.second[0]->getContext(), NewTyBits);
+ if (ClassAllTy[EC1.first].test(1) && ClassAllTy[EC2.first].test(1)) {
+ if (NewTyBits == 16)
+ NewTy = Type::getHalfTy(Ctx);
+ else if (NewTyBits == 32)
+ NewTy = Type::getFloatTy(Ctx);
+ else if (NewTyBits == 64)
+ NewTy = Type::getDoubleTy(Ctx);
+ } else if (ClassAllTy[EC1.first].test(2) &&
+ ClassAllTy[EC2.first].test(2)) {
+ NewTy = PointerType::get(Ctx, AS2);
+ }
+
+ for (auto *Inst : EC2.second) {
+ auto *Ptr = getLoadStorePointerOperand(Inst);
+ auto *OrigTy = Inst->getType();
+ if (OrigTy == NewTy)
+ continue;
+ if (auto *LI = dyn_cast<LoadInst>(Inst)) {
+ Builder.SetInsertPoint(LI->getIterator());
+ auto *NewLoad = Builder.CreateLoad(NewTy, Ptr);
+ auto *Cast = Builder.CreateBitOrPointerCast(
+ NewLoad, OrigTy, NewLoad->getName() + ".cast");
+ LI->replaceAllUsesWith(Cast);
+ LI->eraseFromParent();
+ EQClasses[EC1.first].emplace_back(NewLoad);
+ } else {
+ auto *SI = cast<StoreInst>(Inst);
+ Builder.SetInsertPoint(SI->getIterator());
+ auto *Cast = Builder.CreateBitOrPointerCast(
+ SI->getValueOperand(), NewTy,
+ SI->getValueOperand()->getName() + ".cast");
+ auto *NewStore = Builder.CreateStore(
+ Cast, getLoadStorePointerOperand(SI), SI->isVolatile());
+ SI->eraseFromParent();
+ EQClasses[EC1.first].emplace_back(NewStore);
+ }
+ }
+
+ // Sort the instructions in the equivalence class by their order in the
+ // basic block. This is important to ensure that the instructions are
+ // vectorized in the correct order.
+ std::sort(EQClasses[EC1.first].begin(), EQClasses[EC1.first].end(),
+ [](Instruction *A, Instruction *B) {
----------------
arsenm wrote:
const
https://github.com/llvm/llvm-project/pull/134436
More information about the llvm-commits
mailing list