[llvm] [LV] Move check if any vector insts will be generated to VPlan. (PR #96622)

Sun Jul 7 04:31:44 PDT 2024

================
@@ -4795,9 +4783,102 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
   } while (!Tail.empty());
 }
 
+/// Check if any recipe of \p Plan will generate a vector value, which will be
+/// assigned a vector register.
+static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
+                                const TargetTransformInfo &TTI) {
+  assert(VF.isVector() && "Checking a scalar VF?");
+  VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
+                          Plan.getCanonicalIV()->getScalarType()->getContext());
+  DenseMap<Type *, bool> GeneratesVector;
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : *VPBB) {
+      // Continue early if the recipe is considered to not produce a vector
+      // result. Note that this includes VPInstruction, where some opcodes may
+      // produce a vector to preserve existing behavior originally as
+      // VPInstructions model aspects not directly mapped to existing IR
+      // instructions.
+      switch (R.getVPDefID()) {
+      case VPDef::VPDerivedIVSC:
+      case VPDef::VPScalarIVStepsSC:
+      case VPDef::VPScalarCastSC:
+      case VPDef::VPReplicateSC:
+      case VPDef::VPInstructionSC:
+      case VPDef::VPCanonicalIVPHISC:
+      case VPDef::VPVectorPointerSC:
+      case VPDef::VPExpandSCEVSC:
+      case VPDef::VPEVLBasedIVPHISC:
+      case VPDef::VPPredInstPHISC:
+      case VPDef::VPBranchOnMaskSC:
+        continue;
+      case VPDef::VPReductionSC:
+      case VPDef::VPActiveLaneMaskPHISC:
+      case VPDef::VPWidenCallSC:
+      case VPDef::VPWidenCanonicalIVSC:
+      case VPDef::VPWidenCastSC:
+      case VPDef::VPWidenGEPSC:
+      case VPDef::VPWidenSC:
+      case VPDef::VPWidenSelectSC:
+      case VPDef::VPBlendSC:
+      case VPDef::VPFirstOrderRecurrencePHISC:
+      case VPDef::VPWidenPHISC:
+      case VPDef::VPWidenIntOrFpInductionSC:
+      case VPDef::VPWidenPointerInductionSC:
+      case VPDef::VPReductionPHISC:
+      case VPDef::VPInterleaveSC:
+      case VPDef::VPWidenLoadEVLSC:
+      case VPDef::VPWidenLoadSC:
+      case VPDef::VPWidenStoreEVLSC:
+      case VPDef::VPWidenStoreSC:
+        break;
+      default:
+        llvm_unreachable("unhandled recipe");
+      }
+
+      auto WillWiden = [&TypeInfo, &TTI, &GeneratesVector, VF](VPValue *VPV) {
+        Type *ScalarTy = TypeInfo.inferScalarType(VPV);
+        const auto &[Iter, Ins] = GeneratesVector.insert({ScalarTy, false});
+        if (Ins) {
+          Type *VectorTy = ToVectorTy(ScalarTy, VF);
+          unsigned NumLegalParts = TTI.getNumberOfParts(VectorTy);
+          if (!NumLegalParts)
+            return false;
+          if (VF.isScalable()) {
+            // <vscale x 1 x iN> is assumed to be profitable over iN because
+            // scalable registers are a distinct register class from scalar
+            // ones. If we ever find a target which wants to lower scalable
+            // vectors back to scalars, we'll need to update this code to
+            // explicitly ask TTI about the register class uses for each part.
+            Iter->second = NumLegalParts <= VF.getKnownMinValue();
+          } else {
+            // Two or more parts that share a register - are vectorized.
+            Iter->second = NumLegalParts < VF.getKnownMinValue();
+          }
+        }
+        return Iter->second;
+      };
+      if (R.getNumDefinedValues() >= 1) {
+        // For multi-def recipes, currently only interleaved loads, suffice to
+        // check first def only.
+        if (WillWiden(R.getVPValue(0)))
+          return true;
+      } else if (isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe,
+                     VPInterleaveRecipe>(&R) &&
+                 WillWiden(R.getOperand(1))) {
+        // For stores check their stored value; for interleaved stores, suffice
+        // the check first stored value only. In all cases this is the second
+        // operand.
+        return true;
+      }
----------------
fhahn wrote:

Adjusted, thanks!

https://github.com/llvm/llvm-project/pull/96622