[llvm] [TLI] Pass replace-with-veclib works with Scalable Vectors. (PR #73642)
Maciej Gabka via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 19 08:36:31 PST 2023
================
@@ -38,138 +42,152 @@ STATISTIC(NumTLIFuncDeclAdded,
STATISTIC(NumFuncUsedAdded,
"Number of functions added to `llvm.compiler.used`");
-static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
- Module *M = CI.getModule();
-
- Function *OldFunc = CI.getCalledFunction();
-
- // Check if the vector library function is already declared in this module,
- // otherwise insert it.
+/// Returns a vector Function that it adds to the Module \p M. When an \p
+/// ScalarFunc is not null, it copies its attributes to the newly created
+/// Function.
+Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
+ Function *ScalarFunc, const StringRef TLIName) {
Function *TLIFunc = M->getFunction(TLIName);
if (!TLIFunc) {
- TLIFunc = Function::Create(OldFunc->getFunctionType(),
- Function::ExternalLinkage, TLIName, *M);
- TLIFunc->copyAttributesFrom(OldFunc);
+ TLIFunc =
+ Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
+ if (ScalarFunc)
+ TLIFunc->copyAttributesFrom(ScalarFunc);
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
<< TLIName << "` of type `" << *(TLIFunc->getType())
<< "` to module.\n");
++NumTLIFuncDeclAdded;
-
- // Add the freshly created function to llvm.compiler.used,
- // similar to as it is done in InjectTLIMappings
+ // Add the freshly created function to llvm.compiler.used, similar to as it
+ // is done in InjectTLIMappings.
appendToCompilerUsed(*M, {TLIFunc});
-
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
<< "` to `@llvm.compiler.used`.\n");
++NumFuncUsedAdded;
}
+ return TLIFunc;
+}
- // Replace the call to the vector intrinsic with a call
- // to the corresponding function from the vector library.
+/// Replace the call to the vector intrinsic ( \p FuncToReplace ) with a call to
+/// the corresponding function from the vector library ( \p TLIFunc ).
+static void replaceWithTLIFunction(CallInst &CI, VFInfo &Info,
+ Function *TLIVecFunc) {
IRBuilder<> IRBuilder(&CI);
SmallVector<Value *> Args(CI.args());
+ if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
+ if (Args.size() == TLIVecFunc->getFunctionType()->getNumParams())
+ static_assert(true && "mask was already in place");
+
+ auto *MaskTy =
+ VectorType::get(Type::getInt1Ty(CI.getContext()), Info.Shape.VF);
+ Args.insert(Args.begin() + OptMaskpos.value(),
+ Constant::getAllOnesValue(MaskTy));
+ }
+
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
- CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
- assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
- "Expecting function types to be identical");
+ CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
CI.replaceAllUsesWith(Replacement);
- if (isa<FPMathOperator>(Replacement)) {
- // Preserve fast math flags for FP math.
+ // Preserve fast math flags for FP math.
+ if (isa<FPMathOperator>(Replacement))
Replacement->copyFastMathFlags(&CI);
- }
+}
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
- << OldFunc->getName() << "` with call to `" << TLIName
- << "`.\n");
- ++NumCallsReplaced;
- return true;
+/// Utility method to get the VecDesc, depending on whether there is such a TLI
+/// mapping, prioritizing a masked version.
+static std::optional<const VecDesc *> getVecDesc(const TargetLibraryInfo &TLI,
+ const StringRef &ScalarName,
+ const ElementCount &VF) {
+ if (auto *VDNoMask = TLI.getVectorMappingInfo(ScalarName, VF, false))
+ return VDNoMask;
+ if (auto *VDMasked = TLI.getVectorMappingInfo(ScalarName, VF, true))
+ return VDMasked;
+ return std::nullopt;
}
+/// Returns whether it is able to replace a call to the intrinsic \p CI with a
+/// TLI mapped call.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
CallInst &CI) {
- if (!CI.getCalledFunction()) {
+ if (!CI.getCalledFunction())
return false;
- }
auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
- if (IntrinsicID == Intrinsic::not_intrinsic) {
- // Replacement is only performed for intrinsic functions
+ // Replacement is only performed for intrinsic functions.
+ if (IntrinsicID == Intrinsic::not_intrinsic)
return false;
- }
- // Convert vector arguments to scalar type and check that
- // all vector operands have identical vector width.
+ // Convert vector arguments to scalar type and check that all vector operands
+ // have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
- SmallVector<Type *> ScalarTypes;
+ SmallVector<Type *> ScalarArgTypes;
for (auto Arg : enumerate(CI.args())) {
- auto *ArgType = Arg.value()->getType();
- // Vector calls to intrinsics can still have
- // scalar operands for specific arguments.
+ auto *ArgTy = Arg.value()->getType();
if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
- ScalarTypes.push_back(ArgType);
- } else {
- // The argument in this place should be a vector if
- // this is a call to a vector intrinsic.
- auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
- if (!VectorArgTy) {
- // The argument is not a vector, do not perform
- // the replacement.
- return false;
- }
- ElementCount NumElements = VectorArgTy->getElementCount();
- if (NumElements.isScalable()) {
- // The current implementation does not support
- // scalable vectors.
- return false;
- }
- if (VF.isNonZero() && VF != NumElements) {
- // The different arguments differ in vector size.
+ ScalarArgTypes.push_back(ArgTy);
+ } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
+ ScalarArgTypes.push_back(ArgTy->getScalarType());
+ // Disallow vector arguments with different VFs. When processing the first
+ // vector argument, store it's VF, and for the rest ensure that they match
+ // it.
+ if (VF.isZero())
+ VF = VectorArgTy->getElementCount();
+ else if (VF != VectorArgTy->getElementCount())
return false;
- } else {
- VF = NumElements;
- }
- ScalarTypes.push_back(VectorArgTy->getElementType());
- }
+ } else
+ // Exit when it is supposed to be a vector argument but it isn't.
+ return false;
}
- // Try to reconstruct the name for the scalar version of this
- // intrinsic using the intrinsic ID and the argument types
- // converted to scalar above.
- std::string ScalarName;
- if (Intrinsic::isOverloaded(IntrinsicID)) {
- ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
- } else {
- ScalarName = Intrinsic::getName(IntrinsicID).str();
- }
+ // Try to reconstruct the name for the scalar version of this intrinsic using
+ // the intrinsic ID and the argument types converted to scalar above.
+ std::string ScalarName =
+ (Intrinsic::isOverloaded(IntrinsicID)
+ ? Intrinsic::getName(IntrinsicID, ScalarArgTypes, CI.getModule())
+ : Intrinsic::getName(IntrinsicID).str());
- if (!TLI.isFunctionVectorizable(ScalarName)) {
- // The TargetLibraryInfo does not contain a vectorized version of
- // the scalar function.
+ // The TargetLibraryInfo does not contain a vectorized version of the scalar
+ // function.
+ if (!TLI.isFunctionVectorizable(ScalarName))
return false;
- }
- // Try to find the mapping for the scalar version of this intrinsic
- // and the exact vector width of the call operands in the
- // TargetLibraryInfo.
- StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF);
-
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
- << ScalarName << "` and vector width " << VF << ".\n");
-
- if (!TLIName.empty()) {
- // Found the correct mapping in the TargetLibraryInfo,
- // replace the call to the intrinsic with a call to
- // the vector library function.
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
- << "`.\n");
- return replaceWithTLIFunction(CI, TLIName);
- }
+ // Try to find the mapping for the scalar version of this intrinsic and the
+ // exact vector width of the call operands in the TargetLibraryInfo.
+ auto OptVD = getVecDesc(TLI, ScalarName, VF);
----------------
mgabka wrote:
IMO since this is used only once not using a function call would actually be more transparent here, and you wouldn't need to use the optional.
https://github.com/llvm/llvm-project/pull/73642
More information about the llvm-commits
mailing list