[llvm] [LV] Emit all remarks for unvectorizable instructions (PR #153833)
Tobias Stadler via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 09:58:32 PDT 2025
https://github.com/tobias-stadler created https://github.com/llvm/llvm-project/pull/153833
If ExtraAnalysis is requested, emit all remarks caused by unvectorizable instructions - instead of only the first.
This causes a lot of indent churn, so the PR is split into 2 commits, the first contains the actual changes, the second one is just formatting.
>From d6917ecfd3ccaeed81c8e370169a73bfcf62c9c8 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Fri, 15 Aug 2025 17:22:00 +0100
Subject: [PATCH 1/2] [LV] Emit remarks for all unvectorizable instructions
---
.../Vectorize/LoopVectorizationLegality.h | 3 +
.../Vectorize/LoopVectorizationLegality.cpp | 74 +++++++++++--------
.../X86/vectorization-remarks-missed.ll | 36 +++++++++
3 files changed, 82 insertions(+), 31 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 43ff084816d18..48ee93acbe008 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -493,6 +493,9 @@ class LoopVectorizationLegality {
/// and we only need to check individual instructions.
bool canVectorizeInstrs();
+ /// Check if an individual instruction is vectorizable.
+ bool canVectorizeInstr(Instruction &I);
+
/// When we vectorize loops we may change the order in which
/// we read and write from memory. This method checks if it is
/// legal to vectorize the code, considering only memory constrains.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index c47fd9421fddd..63f9dafaf5454 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -793,12 +793,48 @@ static bool canWidenCallReturnType(Type *Ty) {
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
- BasicBlock *Header = TheLoop->getHeader();
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ bool Result = true;
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
for (Instruction &I : *BB) {
+ Result &= canVectorizeInstr(I);
+ if (!DoExtraAnalysis && !Result)
+ return false;
+ }
+ }
+
+ if (!PrimaryInduction) {
+ if (Inductions.empty()) {
+ reportVectorizationFailure("Did not find one integer induction var",
+ "loop induction variable could not be identified",
+ "NoInductionVariable", ORE, TheLoop);
+ return false;
+ }
+ if (!WidestIndTy) {
+ reportVectorizationFailure("Did not find one integer induction var",
+ "integer loop induction variable could not be identified",
+ "NoIntegerInductionVariable", ORE, TheLoop);
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+ }
+
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
+ PrimaryInduction = nullptr;
+
+ return Result;
+}
+
+bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
+ BasicBlock *BB = I.getParent();
+ BasicBlock *Header = TheLoop->getHeader();
+
if (auto *Phi = dyn_cast<PHINode>(&I)) {
Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
@@ -820,7 +856,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// legalization for reduction, induction and fixed order
// recurrences.
AllowedExit.insert(&I);
- continue;
+ return true;
}
// We only allow if-converted PHIs with exactly two incoming values.
@@ -837,7 +873,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
AllowedExit.insert(RedDes.getLoopExitInstr());
Reductions[Phi] = RedDes;
- continue;
+ return true;
}
// We prevent matching non-constant strided pointer IVS to preserve
@@ -871,13 +907,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
!IsDisallowedStridedPointerInduction(ID)) {
addInductionPhi(Phi, ID, AllowedExit);
Requirements->addExactFPMathInst(ID.getExactFPMathInst());
- continue;
+ return true;
}
if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
- continue;
+ return true;
}
// As a last resort, coerce the PHI to a AddRec expression
@@ -885,7 +921,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
!IsDisallowedStridedPointerInduction(ID)) {
addInductionPhi(Phi, ID, AllowedExit);
- continue;
+ return true;
}
reportVectorizationFailure("Found an unidentified PHI",
@@ -1036,36 +1072,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// outside the loop.
if (PSE.getPredicate().isAlwaysTrue()) {
AllowedExit.insert(&I);
- continue;
+ return true;
}
reportVectorizationFailure("Value cannot be used outside the loop",
"ValueUsedOutsideLoop", ORE, TheLoop, &I);
return false;
}
- } // next instr.
- }
-
- if (!PrimaryInduction) {
- if (Inductions.empty()) {
- reportVectorizationFailure("Did not find one integer induction var",
- "loop induction variable could not be identified",
- "NoInductionVariable", ORE, TheLoop);
- return false;
- }
- if (!WidestIndTy) {
- reportVectorizationFailure("Did not find one integer induction var",
- "integer loop induction variable could not be identified",
- "NoIntegerInductionVariable", ORE, TheLoop);
- return false;
- }
- LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- }
-
- // Now we know the widest induction type, check if our found induction
- // is the same size. If it's not, unset it here and InnerLoopVectorizer
- // will create another.
- if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
- PrimaryInduction = nullptr;
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 70134fa6bc78d..5ec093c5af6ba 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -117,6 +117,33 @@
; YAML-NEXT: ...
; YAML-NEXT: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NonReductionValueUsedOutsideLoop
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: value that could not be identified as reduction is used outside the loop
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: CantVectorizeLibcall
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 29, Column: 11 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: call instruction cannot be vectorized
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NoInductionVariable
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: loop induction variable could not be identified
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
; YAML-NEXT: Name: UnsupportedUncountableLoop
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
; YAML-NEXT: Function: test_multiple_failures
@@ -124,6 +151,15 @@
; YAML-NEXT: - String: 'loop not vectorized: '
; YAML-NEXT: - String: Cannot vectorize uncountable loop
; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: ...
; YAML: --- !Missed
; YAML-NEXT: Pass: loop-vectorize
; YAML-NEXT: Name: MissedDetails
>From cd0d8c61e1ec7ddfe85fa7bcb0ed2b727f4f767f Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Fri, 15 Aug 2025 17:23:25 +0100
Subject: [PATCH 2/2] format
---
.../Vectorize/LoopVectorizationLegality.cpp | 458 +++++++++---------
1 file changed, 231 insertions(+), 227 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 63f9dafaf5454..789047a2a28e7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -808,13 +808,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PrimaryInduction) {
if (Inductions.empty()) {
- reportVectorizationFailure("Did not find one integer induction var",
+ reportVectorizationFailure(
+ "Did not find one integer induction var",
"loop induction variable could not be identified",
"NoInductionVariable", ORE, TheLoop);
return false;
}
if (!WidestIndTy) {
- reportVectorizationFailure("Did not find one integer induction var",
+ reportVectorizationFailure(
+ "Did not find one integer induction var",
"integer loop induction variable could not be identified",
"NoIntegerInductionVariable", ORE, TheLoop);
return false;
@@ -835,249 +837,251 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
BasicBlock *BB = I.getParent();
BasicBlock *Header = TheLoop->getHeader();
- if (auto *Phi = dyn_cast<PHINode>(&I)) {
- Type *PhiTy = Phi->getType();
- // Check that this PHI type is allowed.
- if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
- !PhiTy->isPointerTy()) {
- reportVectorizationFailure("Found a non-int non-pointer PHI",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
- return false;
- }
+ if (auto *Phi = dyn_cast<PHINode>(&I)) {
+ Type *PhiTy = Phi->getType();
+ // Check that this PHI type is allowed.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
+ reportVectorizationFailure(
+ "Found a non-int non-pointer PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", ORE, TheLoop);
+ return false;
+ }
- // If this PHINode is not in the header block, then we know that we
- // can convert it to select during if-conversion. No need to check if
- // the PHIs in this block are induction or reduction variables.
- if (BB != Header) {
- // Non-header phi nodes that have outside uses can be vectorized. Add
- // them to the list of allowed exits.
- // Unsafe cyclic dependencies with header phis are identified during
- // legalization for reduction, induction and fixed order
- // recurrences.
- AllowedExit.insert(&I);
- return true;
- }
+ // If this PHINode is not in the header block, then we know that we
+ // can convert it to select during if-conversion. No need to check if
+ // the PHIs in this block are induction or reduction variables.
+ if (BB != Header) {
+ // Non-header phi nodes that have outside uses can be vectorized. Add
+ // them to the list of allowed exits.
+ // Unsafe cyclic dependencies with header phis are identified during
+ // legalization for reduction, induction and fixed order
+ // recurrences.
+ AllowedExit.insert(&I);
+ return true;
+ }
- // We only allow if-converted PHIs with exactly two incoming values.
- if (Phi->getNumIncomingValues() != 2) {
- reportVectorizationFailure("Found an invalid PHI",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop, Phi);
- return false;
- }
+ // We only allow if-converted PHIs with exactly two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ reportVectorizationFailure(
+ "Found an invalid PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", ORE, TheLoop, Phi);
+ return false;
+ }
- RecurrenceDescriptor RedDes;
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
- DT, PSE.getSE())) {
- Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
- AllowedExit.insert(RedDes.getLoopExitInstr());
- Reductions[Phi] = RedDes;
- return true;
- }
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
+ PSE.getSE())) {
+ Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
+ return true;
+ }
- // We prevent matching non-constant strided pointer IVS to preserve
- // historical vectorizer behavior after a generalization of the
- // IVDescriptor code. The intent is to remove this check, but we
- // have to fix issues around code quality for such loops first.
- auto IsDisallowedStridedPointerInduction =
- [](const InductionDescriptor &ID) {
- if (AllowStridedPointerIVs)
- return false;
- return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
- ID.getConstIntStepValue() == nullptr;
- };
-
- // TODO: Instead of recording the AllowedExit, it would be good to
- // record the complementary set: NotAllowedExit. These include (but may
- // not be limited to):
- // 1. Reduction phis as they represent the one-before-last value, which
- // is not available when vectorized
- // 2. Induction phis and increment when SCEV predicates cannot be used
- // outside the loop - see addInductionPhi
- // 3. Non-Phis with outside uses when SCEV predicates cannot be used
- // outside the loop - see call to hasOutsideLoopUser in the non-phi
- // handling below
- // 4. FixedOrderRecurrence phis that can possibly be handled by
- // extraction.
- // By recording these, we can then reason about ways to vectorize each
- // of these NotAllowedExit.
- InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
- !IsDisallowedStridedPointerInduction(ID)) {
- addInductionPhi(Phi, ID, AllowedExit);
- Requirements->addExactFPMathInst(ID.getExactFPMathInst());
- return true;
- }
+ // We prevent matching non-constant strided pointer IVS to preserve
+ // historical vectorizer behavior after a generalization of the
+ // IVDescriptor code. The intent is to remove this check, but we
+ // have to fix issues around code quality for such loops first.
+ auto IsDisallowedStridedPointerInduction =
+ [](const InductionDescriptor &ID) {
+ if (AllowStridedPointerIVs)
+ return false;
+ return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
+ ID.getConstIntStepValue() == nullptr;
+ };
+
+ // TODO: Instead of recording the AllowedExit, it would be good to
+ // record the complementary set: NotAllowedExit. These include (but may
+ // not be limited to):
+ // 1. Reduction phis as they represent the one-before-last value, which
+ // is not available when vectorized
+ // 2. Induction phis and increment when SCEV predicates cannot be used
+ // outside the loop - see addInductionPhi
+ // 3. Non-Phis with outside uses when SCEV predicates cannot be used
+ // outside the loop - see call to hasOutsideLoopUser in the non-phi
+ // handling below
+ // 4. FixedOrderRecurrence phis that can possibly be handled by
+ // extraction.
+ // By recording these, we can then reason about ways to vectorize each
+ // of these NotAllowedExit.
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
+ !IsDisallowedStridedPointerInduction(ID)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ Requirements->addExactFPMathInst(ID.getExactFPMathInst());
+ return true;
+ }
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
- AllowedExit.insert(Phi);
- FixedOrderRecurrences.insert(Phi);
- return true;
- }
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
+ AllowedExit.insert(Phi);
+ FixedOrderRecurrences.insert(Phi);
+ return true;
+ }
- // As a last resort, coerce the PHI to a AddRec expression
- // and re-try classifying it a an induction PHI.
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
- !IsDisallowedStridedPointerInduction(ID)) {
- addInductionPhi(Phi, ID, AllowedExit);
- return true;
- }
+ // As a last resort, coerce the PHI to a AddRec expression
+ // and re-try classifying it a an induction PHI.
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
+ !IsDisallowedStridedPointerInduction(ID)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ return true;
+ }
- reportVectorizationFailure("Found an unidentified PHI",
- "value that could not be identified as "
- "reduction is used outside the loop",
- "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
- return false;
- } // end of PHI handling
-
- // We handle calls that:
- // * Have a mapping to an IR intrinsic.
- // * Have a vector version available.
- auto *CI = dyn_cast<CallInst>(&I);
-
- if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
- !(CI->getCalledFunction() && TLI &&
- (!VFDatabase::getMappings(*CI).empty() ||
- isTLIScalarize(*TLI, *CI)))) {
- // If the call is a recognized math libary call, it is likely that
- // we can vectorize it given loosened floating-point constraints.
- LibFunc Func;
- bool IsMathLibCall =
- TLI && CI->getCalledFunction() &&
- CI->getType()->isFloatingPointTy() &&
- TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
- TLI->hasOptimizedCodeGen(Func);
-
- if (IsMathLibCall) {
- // TODO: Ideally, we should not use clang-specific language here,
- // but it's hard to provide meaningful yet generic advice.
- // Also, should this be guarded by allowExtraAnalysis() and/or be part
- // of the returned info from isFunctionVectorizable()?
+ reportVectorizationFailure("Found an unidentified PHI",
+ "value that could not be identified as "
+ "reduction is used outside the loop",
+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
+ Phi);
+ return false;
+ } // end of PHI handling
+
+ // We handle calls that:
+ // * Have a mapping to an IR intrinsic.
+ // * Have a vector version available.
+ auto *CI = dyn_cast<CallInst>(&I);
+
+ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
+ !(CI->getCalledFunction() && TLI &&
+ (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
+ // If the call is a recognized math libary call, it is likely that
+ // we can vectorize it given loosened floating-point constraints.
+ LibFunc Func;
+ bool IsMathLibCall =
+ TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->hasOptimizedCodeGen(Func);
+
+ if (IsMathLibCall) {
+ // TODO: Ideally, we should not use clang-specific language here,
+ // but it's hard to provide meaningful yet generic advice.
+ // Also, should this be guarded by allowExtraAnalysis() and/or be part
+ // of the returned info from isFunctionVectorizable()?
+ reportVectorizationFailure(
+ "Found a non-intrinsic callsite",
+ "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags",
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
+ } else {
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "call instruction cannot be vectorized",
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
+ }
+ return false;
+ }
+
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized (i.e. loop invariant).
+ if (CI) {
+ auto *SE = PSE.getSE();
+ Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
+ for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
reportVectorizationFailure(
- "Found a non-intrinsic callsite",
- "library call cannot be vectorized. "
- "Try compiling with -fno-math-errno, -ffast-math, "
- "or similar flags",
- "CantVectorizeLibcall", ORE, TheLoop, CI);
- } else {
- reportVectorizationFailure("Found a non-intrinsic callsite",
- "call instruction cannot be vectorized",
- "CantVectorizeLibcall", ORE, TheLoop, CI);
+ "Found unvectorizable intrinsic",
+ "intrinsic instruction cannot be vectorized",
+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);
+ return false;
}
- return false;
- }
-
- // Some intrinsics have scalar arguments and should be same in order for
- // them to be vectorized (i.e. loop invariant).
- if (CI) {
- auto *SE = PSE.getSE();
- Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
- if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
- if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
- TheLoop)) {
- reportVectorizationFailure("Found unvectorizable intrinsic",
- "intrinsic instruction cannot be vectorized",
- "CantVectorizeIntrinsic", ORE, TheLoop, CI);
- return false;
- }
- }
- }
-
- // If we found a vectorized variant of a function, note that so LV can
- // make better decisions about maximum VF.
- if (CI && !VFDatabase::getMappings(*CI).empty())
- VecCallVariantsFound = true;
-
- auto CanWidenInstructionTy = [](Instruction const &Inst) {
- Type *InstTy = Inst.getType();
- if (!isa<StructType>(InstTy))
- return canVectorizeTy(InstTy);
-
- // For now, we only recognize struct values returned from calls where
- // all users are extractvalue as vectorizable. All element types of the
- // struct must be types that can be widened.
- return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
- all_of(Inst.users(), IsaPred<ExtractValueInst>);
- };
-
- // Check that the instruction return type is vectorizable.
- // We can't vectorize casts from vector type to scalar type.
- // Also, we can't vectorize extractelement instructions.
- if (!CanWidenInstructionTy(I) ||
- (isa<CastInst>(I) &&
- !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
- isa<ExtractElementInst>(I)) {
- reportVectorizationFailure("Found unvectorizable type",
- "instruction return type cannot be vectorized",
- "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
- return false;
}
+ }
- // Check that the stored type is vectorizable.
- if (auto *ST = dyn_cast<StoreInst>(&I)) {
- Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T)) {
- reportVectorizationFailure("Store instruction cannot be vectorized",
- "CantVectorizeStore", ORE, TheLoop, ST);
- return false;
- }
+ // If we found a vectorized variant of a function, note that so LV can
+ // make better decisions about maximum VF.
+ if (CI && !VFDatabase::getMappings(*CI).empty())
+ VecCallVariantsFound = true;
+
+ auto CanWidenInstructionTy = [](Instruction const &Inst) {
+ Type *InstTy = Inst.getType();
+ if (!isa<StructType>(InstTy))
+ return canVectorizeTy(InstTy);
+
+ // For now, we only recognize struct values returned from calls where
+ // all users are extractvalue as vectorizable. All element types of the
+ // struct must be types that can be widened.
+ return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
+ all_of(Inst.users(), IsaPred<ExtractValueInst>);
+ };
- // For nontemporal stores, check that a nontemporal vector version is
- // supported on the target.
- if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
- // Arbitrarily try a vector of 2 elements.
- auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
- assert(VecTy && "did not find vectorized version of stored type");
- if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
- reportVectorizationFailure(
- "nontemporal store instruction cannot be vectorized",
- "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
- return false;
- }
- }
+ // Check that the instruction return type is vectorizable.
+ // We can't vectorize casts from vector type to scalar type.
+ // Also, we can't vectorize extractelement instructions.
+ if (!CanWidenInstructionTy(I) ||
+ (isa<CastInst>(I) &&
+ !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
+ isa<ExtractElementInst>(I)) {
+ reportVectorizationFailure("Found unvectorizable type",
+ "instruction return type cannot be vectorized",
+ "CantVectorizeInstructionReturnType", ORE,
+ TheLoop, &I);
+ return false;
+ }
- } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
- if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
- // For nontemporal loads, check that a nontemporal vector version is
- // supported on the target (arbitrarily try a vector of 2 elements).
- auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
- assert(VecTy && "did not find vectorized version of load type");
- if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
- reportVectorizationFailure(
- "nontemporal load instruction cannot be vectorized",
- "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
- return false;
- }
- }
+ // Check that the stored type is vectorizable.
+ if (auto *ST = dyn_cast<StoreInst>(&I)) {
+ Type *T = ST->getValueOperand()->getType();
+ if (!VectorType::isValidElementType(T)) {
+ reportVectorizationFailure("Store instruction cannot be vectorized",
+ "CantVectorizeStore", ORE, TheLoop, ST);
+ return false;
+ }
- // FP instructions can allow unsafe algebra, thus vectorizable by
- // non-IEEE-754 compliant SIMD units.
- // This applies to floating-point math operations and calls, not memory
- // operations, shuffles, or casts, as they don't change precision or
- // semantics.
- } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
- !I.isFast()) {
- LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
- Hints->setPotentiallyUnsafe();
+ // For nontemporal stores, check that a nontemporal vector version is
+ // supported on the target.
+ if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
+ // Arbitrarily try a vector of 2 elements.
+ auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
+ assert(VecTy && "did not find vectorized version of stored type");
+ if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
+ reportVectorizationFailure(
+ "nontemporal store instruction cannot be vectorized",
+ "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
+ return false;
}
+ }
- // Reduction instructions are allowed to have exit users.
- // All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
- // We can safely vectorize loops where instructions within the loop are
- // used outside the loop only if the SCEV predicates within the loop is
- // same as outside the loop. Allowing the exit means reusing the SCEV
- // outside the loop.
- if (PSE.getPredicate().isAlwaysTrue()) {
- AllowedExit.insert(&I);
- return true;
- }
- reportVectorizationFailure("Value cannot be used outside the loop",
- "ValueUsedOutsideLoop", ORE, TheLoop, &I);
+ } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
+ // For nontemporal loads, check that a nontemporal vector version is
+ // supported on the target (arbitrarily try a vector of 2 elements).
+ auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
+ assert(VecTy && "did not find vectorized version of load type");
+ if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
+ reportVectorizationFailure(
+ "nontemporal load instruction cannot be vectorized",
+ "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
return false;
}
+ }
+
+ // FP instructions can allow unsafe algebra, thus vectorizable by
+ // non-IEEE-754 compliant SIMD units.
+ // This applies to floating-point math operations and calls, not memory
+ // operations, shuffles, or casts, as they don't change precision or
+ // semantics.
+ } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
+ !I.isFast()) {
+ LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
+ Hints->setPotentiallyUnsafe();
+ }
+
+ // Reduction instructions are allowed to have exit users.
+ // All other instructions must not have external users.
+ if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
+ // We can safely vectorize loops where instructions within the loop are
+ // used outside the loop only if the SCEV predicates within the loop is
+ // same as outside the loop. Allowing the exit means reusing the SCEV
+ // outside the loop.
+ if (PSE.getPredicate().isAlwaysTrue()) {
+ AllowedExit.insert(&I);
+ return true;
+ }
+ reportVectorizationFailure("Value cannot be used outside the loop",
+ "ValueUsedOutsideLoop", ORE, TheLoop, &I);
+ return false;
+ }
return true;
}
More information about the llvm-commits
mailing list