[llvm] f225471 - [LSV] Fix the ContextInst for computeKnownBits.

Sun May 28 08:03:49 PDT 2023

Author: Justin Lebar
Date: 2023-05-28T08:00:52-07:00
New Revision: f225471c68881d31835a06c6b2f2b40bdaa287d5

URL: https://github.com/llvm/llvm-project/commit/f225471c68881d31835a06c6b2f2b40bdaa287d5
DIFF: https://github.com/llvm/llvm-project/commit/f225471c68881d31835a06c6b2f2b40bdaa287d5.diff

LOG: [LSV] Fix the ContextInst for computeKnownBits.

Previously we used the later of GEPA or GEPB.  This is hacky because
really we should be using the later of the two load/store instructions
being considered.  But also it's flat-out incorrect, because GEPA and
GEPB might be in different BBs, in which case we cannot ask which one
comes last (assertion failure,
https://reviews.llvm.org/D149893#4378332).

Fixed, now we use the correct context instruction.

Differential Revision: https://reviews.llvm.org/D151630

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index f874c63d6cc99..d4a1815719065 100644

--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -296,10 +296,13 @@ class Vectorizer {
 
   /// Tries to compute the offset in bytes PtrB - PtrA.
   std::optional<APInt> getConstantOffset(Value *PtrA, Value *PtrB,
+                                         Instruction *ContextInst,
                                          unsigned Depth = 0);
-  std::optional<APInt> gtConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
-                                                    unsigned Depth);
+  std::optional<APInt> getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
+                                                     Instruction *ContextInst,
+                                                     unsigned Depth);
   std::optional<APInt> getConstantOffsetSelects(Value *PtrA, Value *PtrB,
+                                                Instruction *ContextInst,
                                                 unsigned Depth);
 
   /// Gets the element type of the vector that the chain will load or store.
@@ -1160,15 +1163,15 @@ static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
   return false;
 }
 
-std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
-                                                              Value *PtrB,
-                                                              unsigned Depth) {
-  LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs PtrA=" << *PtrA
-                    << " PtrB=" << *PtrB << " Depth=" << Depth << "\n");
+std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
+    Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
+  LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
+                    << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst
+                    << " Depth=" << Depth << "\n");
   auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
   auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
   if (!GEPA || !GEPB)
-    return getConstantOffsetSelects(PtrA, PtrB, Depth);
+    return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);
 
   // Look through GEPs after checking they're the same except for the last
   // index.
@@ -1215,7 +1218,7 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
     return std::nullopt;
   APInt IdxDiff = *IdxDiffRange.getSingleElement();
 
-  LLVM_DEBUG(dbgs() << "LSV: gtConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
+  LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
                     << "\n");
 
   // Now we need to prove that adding IdxDiff to ValA won't overflow.
@@ -1257,7 +1260,6 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
   if (!Safe) {
     // When computing known bits, use the GEPs as context instructions, since
     // they likely are in the same BB as the load/store.
-    Instruction *ContextInst = GEPA->comesBefore(GEPB) ? GEPB : GEPA;
     KnownBits Known(BitWidth);
     computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC,
                      ContextInst, &DT);
@@ -1274,8 +1276,8 @@ std::optional<APInt> Vectorizer::gtConstantOffsetComplexAddrs(Value *PtrA,
   return std::nullopt;
 }
 
-std::optional<APInt>
-Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) {
+std::optional<APInt> Vectorizer::getConstantOffsetSelects(
+    Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
   if (Depth++ == MaxDepth)
     return std::nullopt;
 
@@ -1284,13 +1286,15 @@ Vectorizer::getConstantOffsetSelects(Value *PtrA, Value *PtrB, unsigned Depth) {
       if (SelectA->getCondition() != SelectB->getCondition())
         return std::nullopt;
       LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA
-                        << ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n");
+                        << ", PtrB=" << *PtrB << ", ContextInst="
+                        << *ContextInst << ", Depth=" << Depth << "\n");
       std::optional<APInt> TrueDiff = getConstantOffset(
-          SelectA->getTrueValue(), SelectB->getTrueValue(), Depth);
+          SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);
       if (!TrueDiff.has_value())
         return std::nullopt;
-      std::optional<APInt> FalseDiff = getConstantOffset(
-          SelectA->getFalseValue(), SelectB->getFalseValue(), Depth);
+      std::optional<APInt> FalseDiff =
+          getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
+                            ContextInst, Depth);
       if (TrueDiff == FalseDiff)
         return TrueDiff;
     }
@@ -1429,9 +1433,11 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
     auto ChainIter = MRU.begin();
     for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();
          ++J, ++ChainIter) {
-      std::optional<APInt> Offset =
-          getConstantOffset(getLoadStorePointerOperand(ChainIter->first),
-                            getLoadStorePointerOperand(I));
+      std::optional<APInt> Offset = getConstantOffset(
+          getLoadStorePointerOperand(ChainIter->first),
+          getLoadStorePointerOperand(I),
+          /*ContextInst=*/
+          (ChainIter->first->comesBefore(I) ? I : ChainIter->first));
       if (Offset.has_value()) {
         // `Offset` might not have the expected number of bits, if e.g. AS has a
         // 
diff erent number of bits than opaque pointers.
@@ -1464,9 +1470,11 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
 }
 
 std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
+                                                   Instruction *ContextInst,
                                                    unsigned Depth) {
   LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
-                    << ", PtrB=" << *PtrB << ", Depth=" << Depth << "\n");
+                    << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
+                    << ", Depth=" << Depth << "\n");
   unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
   APInt OffsetA(OffsetBitWidth, 0);
   APInt OffsetB(OffsetBitWidth, 0);
@@ -1495,7 +1503,8 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
     if (DistRange.isSingleElement())
       return OffsetB - OffsetA + *DistRange.getSingleElement();
   }
-  std::optional<APInt> Diff = gtConstantOffsetComplexAddrs(PtrA, PtrB, Depth);
+  std::optional<APInt> Diff =
+      getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth);
   if (Diff.has_value())
     return OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth());
   return std::nullopt;

diff  --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll
index d6eac209ac682..e5804080fb4cd 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll
@@ -118,3 +118,22 @@ define void @sexted_i1_gep_index(ptr addrspace(1) %p, i32 %val) {
   %val1 = load i32, ptr addrspace(1) %gep.1
   ret void
 }
+
+; CHECK-LABEL: @zexted_i1_gep_index_
diff erent_bbs
+; CHECK: load i32
+; CHECK: load i32
+define void @zexted_i1_gep_index_
diff erent_bbs(ptr addrspace(1) %p, i32 %val) {
+entry:
+  %selector = icmp eq i32 %val, 0
+  %flipped = xor i1 %selector, 1
+  %index.0 = zext i1 %selector to i64
+  %index.1 = zext i1 %flipped to i64
+  %gep.0 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.0
+  br label %next
+
+next:
+  %gep.1 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %index.1
+  %val0 = load i32, ptr addrspace(1) %gep.0
+  %val1 = load i32, ptr addrspace(1) %gep.1
+  ret void
+}