[llvm] 6ef8299 - Recommit "[VPlan] Replace disjoint or with add instead of dropping disjoint. (#83821)"

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 12:11:43 PDT 2024


Author: Florian Hahn
Date: 2024-03-27T19:11:18Z
New Revision: 6ef829941b38f7e8a28c4cba1ff25cd0ae9f7d3d

URL: https://github.com/llvm/llvm-project/commit/6ef829941b38f7e8a28c4cba1ff25cd0ae9f7d3d
DIFF: https://github.com/llvm/llvm-project/commit/6ef829941b38f7e8a28c4cba1ff25cd0ae9f7d3d.diff

LOG: Recommit "[VPlan] Replace disjoint or with add instead of dropping disjoint. (#83821)"

Recommit with a fix for the use-after-free causing the revert.
This reverts the revert commit f872043e055f4163c3c4b1b86ca0354490174987.

Original commit message:

Dropping disjoint from an OR may yield incorrect results, as some
analysis may have converted it to an Add implicitly (e.g. SCEV used for
dependence analysis). Instead, replace it with an equivalent Add.

This is possible as all users of the disjoint OR only access lanes where
the operands are disjoint or poison otherwise.

Note that replacing all disjoint ORs with ADDs instead of dropping the
flags is not strictly necessary. It is only needed for disjoint ORs that
SCEV treated as ADDs, but those are not tracked.

There are other places that may drop poison-generating flags; those
likely need similar treatment.

Fixes https://github.com/llvm/llvm-project/issues/81872

PR: https://github.com/llvm/llvm-project/pull/83821

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
    llvm/lib/Transforms/Vectorize/VPlan.h
    llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/test/Transforms/LoopVectorize/X86/pr81872.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index e86705e8988904..5d03b66b0ce335 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -68,6 +68,9 @@ class VPBuilder {
 public:
   VPBuilder() = default;
   VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
+  VPBuilder(VPRecipeBase *InsertPt) {
+    setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
+  }
 
   /// Clear the insertion point: created instructions will not be inserted into
   /// a block.

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f70ccf8270b214..2f56799a7b5186 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
     return WrapFlags.HasNSW;
   }
 
+  bool isDisjoint() const {
+    assert(OpType == OperationType::DisjointOp &&
+           "recipe cannot have a disjoing flag");
+    return DisjointFlags.IsDisjoint;
+  }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void printFlags(raw_ostream &O) const;
 #endif

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index aa253590694514..a03a408686ef13 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
   return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
 }
 
+template <typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
+m_Or(const Op0_t &Op0, const Op1_t &Op1) {
+  return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
+}
 } // namespace VPlanPatternMatch
 } // namespace llvm
 

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e6ab27a918bcec..006f4349d6fb91 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1255,7 +1255,24 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
       // load/store. If the underlying instruction has poison-generating flags,
       // drop them directly.
       if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
-        RecWithFlags->dropPoisonGeneratingFlags();
+        VPValue *A, *B;
+        using namespace llvm::VPlanPatternMatch;
+        // Dropping disjoint from an OR may yield incorrect results, as some
+        // analysis may have converted it to an Add implicitly (e.g. SCEV used
+        // for dependence analysis). Instead, replace it with an equivalent Add.
+        // This is possible as all users of the disjoint OR only access lanes
+        // where the operands are disjoint or poison otherwise.
+        if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
+            RecWithFlags->isDisjoint()) {
+          VPBuilder Builder(RecWithFlags);
+          VPInstruction *New = Builder.createOverflowingOp(
+              Instruction::Add, {A, B}, {false, false},
+              RecWithFlags->getDebugLoc());
+          RecWithFlags->replaceAllUsesWith(New);
+          RecWithFlags->eraseFromParent();
+          CurRec = New;
+        } else
+          RecWithFlags->dropPoisonGeneratingFlags();
       } else {
         Instruction *Instr = dyn_cast_or_null<Instruction>(
             CurRec->getVPSingleValue()->getUnderlyingValue());

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
index 14acb6f57aa0c9..3f38abc75a5837 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3


        


More information about the llvm-commits mailing list