[llvm] VectorWiden pass to widen aleady vectorized instrctions (PR #67029)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 4 02:48:25 PDT 2023
================
@@ -0,0 +1,356 @@
+///==--- VectorWiden.cpp - Combining Vector Operations to wider types ----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to widen vector operations to a wider type, it finds
+// independent from each other operations with a certain vector type as SLP does
+// with scalars by Bottom Up. It detects consecutive stores that can be put
+// together into a wider vector-stores. Next, it attempts to construct
+// vectorizable tree using the use-def chains.
+//
+//==------------------------------------------------------------------------==//
+
+#include "llvm/Transforms/Vectorize/VectorWiden.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vector-widen"
+
+// Due to independant operations to widening that we consider with possibility
+// to merge those operations into one and also to widening store if we find
+// later store instructions. We have to consider the distance between those
+// independent operations or we might introduce bad register pressure, etc.
+
+static cl::opt<unsigned>
+ MaxInstDistance("vw-max-instr-distance", cl::init(30), cl::Hidden,
+ cl::desc("Maximum distance between instructions to"
+ "consider to widen"));
+
+namespace {
+class VectorWiden {
+public:
+ using InstrList = SmallVector<Instruction *, 2>;
+ using ValueList = SmallVector<Value *, 2>;
+ VectorWiden(Function &F, const TargetTransformInfo &TTI, DominatorTree &DT,
+ DependenceInfo &DI, const PostDominatorTree &PDT)
+ : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), DI(DI), PDT(PDT) {}
+
+ bool run();
+
+private:
+ Function &F;
+ IRBuilder<> Builder;
+ const TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ DependenceInfo &DI;
+ const PostDominatorTree &PDT;
+ TargetLibraryInfo *TLI;
+
+ DenseSet<Instruction *> DeletedInstructions;
+
+ /// Checks if the instruction is marked for deletion.
+ bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
+
+ /// Removes an instruction from its block and eventually deletes it.
+ void eraseInstruction(Instruction *I) { DeletedInstructions.insert(I); }
+
+ bool processBB(BasicBlock &BB, LLVMContext &Context);
+
+ bool widenNode(ArrayRef<Instruction *> IL, LLVMContext &Context);
+
+ void widenFPTrunc(ArrayRef<Instruction *> IL);
+
+ void widenAdd(ArrayRef<Instruction *> IL, bool Reorder);
+
+ InstructionCost getOpCost(unsigned Opcode, Type *To, Type *From,
+ Instruction *I);
+};
+} // namespace
+
+void VectorWiden::widenFPTrunc(ArrayRef<Instruction *> IL) {
+ Instruction *I = IL[0];
+ Instruction *I1 = IL[1];
+ ScalableVectorType *RetOrigType = cast<ScalableVectorType>(I->getType());
+ ScalableVectorType *OrigType =
+ cast<ScalableVectorType>(I->getOperand(0)->getType());
+ ScalableVectorType *RetType =
+ ScalableVectorType::getDoubleElementsVectorType(RetOrigType);
+ ScalableVectorType *OpType =
+ ScalableVectorType::getDoubleElementsVectorType(OrigType);
+ Value *WideVec = UndefValue::get(OpType);
+ Builder.SetInsertPoint(I);
+ Function *InsertIntr = llvm::Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::vector_insert, {OpType, OrigType});
+ Value *Insert1 = Builder.CreateCall(
+ InsertIntr, {WideVec, I->getOperand(0), Builder.getInt64(0)});
+ Value *Insert2 = Builder.CreateCall(
+ InsertIntr, {Insert1, I1->getOperand(0), Builder.getInt64(4)});
+ Value *ResFPTrunc =
+ Builder.CreateCast(Instruction::FPTrunc, Insert2, RetType);
+ Function *ExtractIntr = llvm::Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::vector_extract, {RetOrigType, RetType});
+ if (!I->users().empty()) {
+ Value *Res =
+ Builder.CreateCall(ExtractIntr, {ResFPTrunc, Builder.getInt64(4)});
+ I->replaceAllUsesWith(Res);
+ }
+ if (!I1->users().empty()) {
+ Value *Res2 =
+ Builder.CreateCall(ExtractIntr, {ResFPTrunc, Builder.getInt64(0)});
+ I1->replaceAllUsesWith(Res2);
+ }
+}
+
+void VectorWiden::widenAdd(ArrayRef<Instruction *> IL, bool Reorder) {
+ Instruction *I = IL[0];
+ Instruction *I1 = IL[1];
+
+ Value *XHi = I->getOperand(0);
+ Value *XLo = I1->getOperand(0);
+ Value *YHi = I->getOperand(1);
+ Value *YLo = I1->getOperand(1);
+ if (Reorder) {
+ std::swap(XHi, YHi);
+ std::swap(XLo, YLo);
+ }
+
+ ScalableVectorType *RetOrigType = cast<ScalableVectorType>(I->getType());
+ ScalableVectorType *OrigType =
+ cast<ScalableVectorType>(I->getOperand(0)->getType());
+ ScalableVectorType *RetType =
+ ScalableVectorType::getDoubleElementsVectorType(RetOrigType);
+ ScalableVectorType *OpType =
+ ScalableVectorType::getDoubleElementsVectorType(OrigType);
+ Value *WideVec = UndefValue::get(OpType);
+ Builder.SetInsertPoint(I);
+ Function *InsertIntr = llvm::Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::vector_insert, {OpType, OrigType});
+ Value *X1 =
+ Builder.CreateCall(InsertIntr, {WideVec, XHi, Builder.getInt64(0)});
+ Value *X2 = Builder.CreateCall(InsertIntr, {X1, XLo, Builder.getInt64(4)});
+ Value *Y1 =
+ Builder.CreateCall(InsertIntr, {WideVec, YHi, Builder.getInt64(0)});
+ Value *Y2 = Builder.CreateCall(InsertIntr, {Y1, YLo, Builder.getInt64(4)});
+ Value *ResAdd = Builder.CreateAdd(X2, Y2);
+ Function *ExtractIntr = llvm::Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::vector_extract, {RetOrigType, RetType});
+ if (!I->users().empty()) {
+ Value *Res = Builder.CreateCall(ExtractIntr, {ResAdd, Builder.getInt64(0)});
+ I->replaceAllUsesWith(Res);
+ }
+ if (!I1->users().empty()) {
+ Value *Res2 =
+ Builder.CreateCall(ExtractIntr, {ResAdd, Builder.getInt64(4)});
+ I1->replaceAllUsesWith(Res2);
+ }
+}
+
+bool VectorWiden::widenNode(ArrayRef<Instruction *> IL, LLVMContext &Context) {
+ LLVM_DEBUG(dbgs() << "VW: widenNode: " << *IL[0] << " " << *IL[1] << "\n");
+ if (!TTI.considerForWidening(Context, IL))
+ return false;
+ if (IL[0] == IL[1])
+ return false;
+ if (IL[0]->getOpcode() != IL[1]->getOpcode())
+ return false;
+ // Ignore if any live in a diffrent Basic Block
+ if (IL[0]->getParent() != IL[1]->getParent())
+ return false;
+ // Ignore if disatance between two are too apart.
+ if (abs(std::distance(IL[1]->getIterator(), IL[0]->getIterator())) >
+ MaxInstDistance)
+ return false;
+ // Check that any instrction is already deleted.
+ if (isDeleted(IL[0]) || isDeleted(IL[1]))
+ return false;
+ if (IL[1] == IL[0]->getOperand(0) || IL[0] == IL[1]->getOperand(0))
+ return false;
+ if (IL[0]->getNumOperands() > 1 &&
+ (IL[1] == IL[0]->getOperand(1) || IL[0] == IL[1]->getOperand(1)))
+ return false;
+ if (!isSafeToMoveBefore(*IL[1], *IL[0], DT, &PDT, &DI))
+ return false;
+ switch (IL[0]->getOpcode()) {
+ case Instruction::FPTrunc: {
+ ScalableVectorType *RetOrigType =
+ cast<ScalableVectorType>(IL[0]->getType());
+ ScalableVectorType *OrigType =
+ cast<ScalableVectorType>(IL[0]->getOperand(0)->getType());
+ InstructionCost Cost =
+ getOpCost(Instruction::FPTrunc, RetOrigType, OrigType, IL[0]);
+ ScalableVectorType *RetType =
+ ScalableVectorType::getDoubleElementsVectorType(RetOrigType);
+ ScalableVectorType *OpType =
+ ScalableVectorType::getDoubleElementsVectorType(OrigType);
+ InstructionCost CostNew =
+ getOpCost(Instruction::FPTrunc, RetType, OpType, IL[0]);
+ if (2 * Cost < CostNew)
+ return false;
+ LLVM_DEBUG(dbgs() << "VW: Decided to widen FPTrunc, safe to merge : "
+ << *IL[0] << " with " << *IL[1] << "\n");
+ widenFPTrunc(IL);
+ return true;
+ }
+ case Instruction::Add: {
+ ScalableVectorType *OrigType =
+ cast<ScalableVectorType>(IL[0]->getOperand(0)->getType());
+ ScalableVectorType *OpType =
+ ScalableVectorType::getDoubleElementsVectorType(OrigType);
+ InstructionCost Cost =
+ getOpCost(Instruction::Add, OrigType, OrigType, IL[0]);
+ InstructionCost CostNew =
+ getOpCost(Instruction::Add, OpType, OpType, IL[0]);
+ if (2 * Cost < CostNew)
+ return false;
+ LLVM_DEBUG(dbgs() << "VW: Decided to widen Add, safe to merge : " << *IL[0]
+ << " with " << *IL[1] << "\n");
+ widenAdd(IL, IL[0]->getOperand(1) != IL[1]->getOperand(1));
+ return true;
+ }
+
+ default:
+ break;
+ }
+ return false;
+}
+
+InstructionCost VectorWiden::getOpCost(unsigned Opcode, Type *To, Type *From,
+ Instruction *I) {
+ InstructionCost Cost = 0;
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ switch (Opcode) {
+ case Instruction::FPTrunc: {
----------------
sdesmalen-arm wrote:
Can you structure the code in this pass in such a way that we don't need a switch statement in every function?
https://github.com/llvm/llvm-project/pull/67029
More information about the llvm-commits
mailing list