[llvm] 420cf69 - [LSV] Return same bitwidth from getConstantOffset.

Justin Lebar via llvm-commits llvm-commits at lists.llvm.org
Mon May 29 08:44:16 PDT 2023


Author: Justin Lebar
Date: 2023-05-29T08:43:47-07:00
New Revision: 420cf6927c35449f234549389e6ce18371cdda24

URL: https://github.com/llvm/llvm-project/commit/420cf6927c35449f234549389e6ce18371cdda24
DIFF: https://github.com/llvm/llvm-project/commit/420cf6927c35449f234549389e6ce18371cdda24.diff

LOG: [LSV] Return same bitwidth from getConstantOffset.

Previously, getConstantOffset could return an APInt with a different
bitwidth than the input pointers.  For example, we might be loading an
opaque 64-bit pointer, but stripAndAccumulateInBoundsConstantOffsets
might give a 32-bit offset.

This was OK in most cases because in gatherChains, we casted the APInt
back to the original ASPtrBits.

But it was not OK when considering selects.  We'd call getConstantOffset
twice and compare the resulting APInt's, which might not have the same
bit width.

This fixes that.  Now getConstantOffset always returns offsets with the
correct width, so we don't need the hack of casting it in gatherChains,
and it works correctly when we're handling selects.

Differential Revision: https://reviews.llvm.org/D151640

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index d4a1815719065..043892c799074 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1441,8 +1441,7 @@ std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
       if (Offset.has_value()) {
         // `Offset` might not have the expected number of bits, if e.g. AS has a
         // 
diff erent number of bits than opaque pointers.
-        ChainIter->second.push_back(
-            ChainElem{I, Offset.value().sextOrTrunc(ASPtrBits)});
+        ChainIter->second.push_back(ChainElem{I, Offset.value()});
         // Move ChainIter to the front of the MRU list.
         MRU.remove(*ChainIter);
         MRU.push_front(*ChainIter);
@@ -1475,9 +1474,11 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
   LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
                     << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
                     << ", Depth=" << Depth << "\n");
-  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
-  APInt OffsetA(OffsetBitWidth, 0);
-  APInt OffsetB(OffsetBitWidth, 0);
+  // We'll ultimately return a value of this bit width, even if computations
+  // happen in a 
diff erent width.
+  unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
+  APInt OffsetA(OrigBitWidth, 0);
+  APInt OffsetB(OrigBitWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
   unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
@@ -1493,7 +1494,7 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
   OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
   OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
   if (PtrA == PtrB)
-    return OffsetB - OffsetA;
+    return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
 
   // Try to compute B - A.
   const SCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA));
@@ -1501,11 +1502,13 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
     LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
     ConstantRange DistRange = SE.getSignedRange(DistScev);
     if (DistRange.isSingleElement())
-      return OffsetB - OffsetA + *DistRange.getSingleElement();
+      return (OffsetB - OffsetA + *DistRange.getSingleElement())
+          .sextOrTrunc(OrigBitWidth);
   }
   std::optional<APInt> Diff =
       getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth);
   if (Diff.has_value())
-    return OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth());
+    return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
+        .sextOrTrunc(OrigBitWidth);
   return std::nullopt;
 }

diff  --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
index f3575e5edd764..aec5bca3b6fd2 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll
@@ -62,6 +62,23 @@ entry:
   unreachable
 }
 
+; CHECK-LABEL: @select_
diff erent_as
+; CHECK: load <2 x i32>
+define void @select_
diff erent_as(ptr addrspace(1) %p0, ptr addrspace(5) %q0, i1 %cond) {
+entry:
+  %p1 = getelementptr inbounds i32, ptr addrspace(1) %p0, i64 1
+  %q1 = getelementptr inbounds i32, ptr addrspace(5) %q0, i64 1
+  %p0.ascast = addrspacecast ptr addrspace(1) %p0 to ptr
+  %p1.ascast = addrspacecast ptr addrspace(1) %p1 to ptr
+  %q0.ascast = addrspacecast ptr addrspace(5) %q0 to ptr
+  %q1.ascast = addrspacecast ptr addrspace(5) %q1 to ptr
+  %sel0 = select i1 %cond, ptr %p0.ascast, ptr %q0.ascast
+  %sel1 = select i1 %cond, ptr %p1.ascast, ptr %q1.ascast
+  %tmp1 = load i32, ptr %sel0, align 8
+  %tmp2 = load i32, ptr %sel1, align 8
+  unreachable
+}
+
 ; CHECK-LABEL: @shrink_ptr
 ; CHECK: load <2 x i32>
 define void @shrink_ptr(ptr %p) {


        


More information about the llvm-commits mailing list