[llvm] ef89e3e - [VPlan] Collect ephemeral values for VPlan.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 9 13:35:06 PDT 2024


Author: Florian Hahn
Date: 2024-07-09T21:34:49+01:00
New Revision: ef89e3efa99ddb7d0dd2bd7ab0809a51ac685a4f

URL: https://github.com/llvm/llvm-project/commit/ef89e3efa99ddb7d0dd2bd7ab0809a51ac685a4f
DIFF: https://github.com/llvm/llvm-project/commit/ef89e3efa99ddb7d0dd2bd7ab0809a51ac685a4f.diff

LOG: [VPlan] Collect ephemeral values for VPlan.

Port collectEphemeralValues to VPlan as collectEphemeralRecipesForVPlan,
use it in willGenerateVectors. This fixes a regression caused by
29b8b72117 for loops where the only vector values are ephemeral.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
    llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
    llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 631f89b1225cb..9749f3db76489 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4790,11 +4790,15 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
   assert(VF.isVector() && "Checking a scalar VF?");
   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
                           Plan.getCanonicalIV()->getScalarType()->getContext());
+  DenseSet<VPRecipeBase *> EphemeralRecipes;
+  collectEphemeralRecipesForVPlan(Plan, EphemeralRecipes);
   // Set of already visited types.
   DenseSet<Type *> Visited;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
     for (VPRecipeBase &R : *VPBB) {
+      if (EphemeralRecipes.contains(&R))
+        continue;
       // Continue early if the recipe is considered to not produce a vector
       //  result. Note that this includes VPInstruction where some opcodes may
       // produce a vector, to preserve existing behavior as VPInstructions model

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c4b096d653158..36d52b255232a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -8,8 +8,10 @@
 
 #include "VPlanAnalysis.h"
 #include "VPlan.h"
+#include "VPlanCFG.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/PatternMatch.h"
 
 using namespace llvm;
 
@@ -278,3 +280,39 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
   CachedTypes[V] = ResultTy;
   return ResultTy;
 }
+
+void llvm::collectEphemeralRecipesForVPlan(
+    VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
+  // First, collect seed recipes which are operands of assumes.
+  SmallVector<VPRecipeBase *> Worklist;
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : *VPBB) {
+      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+      if (!RepR || !match(RepR->getUnderlyingInstr(),
+                          PatternMatch::m_Intrinsic<Intrinsic::assume>()))
+        continue;
+      Worklist.push_back(RepR);
+      EphRecipes.insert(RepR);
+    }
+  }
+
+  // Process operands of candidates in worklist and add them to the set of
+  // ephemeral recipes, if they don't have side-effects and are only used by
+  // other ephemeral recipes.
+  while (!Worklist.empty()) {
+    VPRecipeBase *Cur = Worklist.pop_back_val();
+    for (VPValue *Op : Cur->operands()) {
+      auto *OpR = Op->getDefiningRecipe();
+      if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
+        continue;
+      if (any_of(Op->users(), [EphRecipes](VPUser *U) {
+            auto *UR = dyn_cast<VPRecipeBase>(U);
+            return !UR || !EphRecipes.contains(UR);
+          }))
+        continue;
+      EphRecipes.insert(OpR);
+      Worklist.push_back(OpR);
+    }
+  }
+}

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index 7d310b1b31b6f..438364efc6294 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -10,6 +10,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 
 namespace llvm {
 
@@ -23,6 +24,8 @@ class VPWidenIntOrFpInductionRecipe;
 class VPWidenMemoryRecipe;
 struct VPWidenSelectRecipe;
 class VPReplicateRecipe;
+class VPRecipeBase;
+class VPlan;
 class Type;
 
 /// An analysis for type-inference for VPValues.
@@ -61,6 +64,9 @@ class VPTypeAnalysis {
   LLVMContext &getContext() { return Ctx; }
 };
 
+// Collect a VPlan's ephemeral recipes (those used only by an assume).
+void collectEphemeralRecipesForVPlan(VPlan &Plan,
+                                     DenseSet<VPRecipeBase *> &EphRecipes);
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll
index 5b250c33a45fb..450caccefb758 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll
@@ -335,90 +335,22 @@ exit:
   ret i32 %final.load
 }
 
-; FIXME: shouldn't be vectorized, as the only vector values generated are
-; ephemeral.
 define i32 @ephemeral_load_and_compare_another_load_used_outside(ptr %start, ptr %end) #0 {
 ; CHECK-LABEL: define i32 @ephemeral_load_and_compare_another_load_used_outside(
 ; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[END2:%.*]] = ptrtoint ptr [[END]] to i64
-; CHECK-NEXT:    [[START1:%.*]] = ptrtoint ptr [[START]] to i64
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[START1]], [[END2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], -8
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
-; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[END]], align 8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP4]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER11:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER8]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER11]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP9]])
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP10]])
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP12]])
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP13]])
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP14]])
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP15]])
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP16]])
-; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP17]])
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP18]])
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP19]])
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP20]])
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP21]])
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP22]])
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP23]])
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP24]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT2]] = getelementptr nusw i8, ptr [[IV]], i64 -8
 ; CHECK-NEXT:    [[L1:%.*]] = load ptr, ptr [[END]], align 8
 ; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[L1]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[L2]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]]
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[FINAL_LOAD:%.*]] = load i32, ptr [[L1_LCSSA]], align 4
 ; CHECK-NEXT:    ret i32 [[FINAL_LOAD]]
 ;
@@ -448,6 +380,4 @@ attributes #0 = { "target-cpu"="skylake-avx512" }
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
 ;.


        


More information about the llvm-commits mailing list