[clang] 7954c57 - [IR] Fix GEP offset computations for vector GEPs (#75448)

Thu Jan 4 01:08:27 PST 2024

Author: Jannik Silvanus
Date: 2024-01-04T10:08:21+01:00
New Revision: 7954c57124b495fbdc73674d71f2e366e4afe522

URL: https://github.com/llvm/llvm-project/commit/7954c57124b495fbdc73674d71f2e366e4afe522
DIFF: https://github.com/llvm/llvm-project/commit/7954c57124b495fbdc73674d71f2e366e4afe522.diff

LOG: [IR] Fix GEP offset computations for vector GEPs (#75448)

Vectors are always bit-packed and don't respect the elements' alignment
requirements. This is different from arrays. This means offsets of
vector GEPs need to be computed differently than offsets of array GEPs.

This PR fixes many places that rely on an incorrect pattern
that always relies on `DL.getTypeAllocSize(GTI.getIndexedType())`.
We replace these by usages of  `GTI.getSequentialElementStride(DL)`, 
which is a new helper function added in this PR.

This changes behavior for GEPs into vectors with element types for which
the (bit) size and alloc size is different. This includes two cases:

* Types with a bit size that is not a multiple of a byte, e.g. i1.
GEPs into such vectors are questionable to begin with, as some elements
  are not even addressable.
* Overaligned types, e.g. i16 with 32-bit alignment.

Existing tests are unaffected, but a miscompilation of a new test is fixed.

---------

Co-authored-by: Nikita Popov <github at npopov.com>

Added: 
    

Modified: 
    clang/lib/CodeGen/CGExprScalar.cpp
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/IR/GetElementPtrTypeIterator.h
    llvm/lib/Analysis/BasicAliasAnalysis.cpp
    llvm/lib/Analysis/InlineCost.cpp
    llvm/lib/Analysis/Local.cpp
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
    llvm/lib/IR/DataLayout.cpp
    llvm/lib/IR/Operator.cpp
    llvm/lib/IR/Value.cpp
    llvm/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/lib/Target/ARM/ARMFastISel.cpp
    llvm/lib/Target/Mips/MipsFastISel.cpp
    llvm/lib/Target/PowerPC/PPCFastISel.cpp
    llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
    llvm/lib/Target/X86/X86FastISel.cpp
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
    llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
    llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
    llvm/test/Transforms/InstCombine/getelementptr.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 6adf99531e30e3..d2c4c7ee50bc89 100644

--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -5292,8 +5292,8 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
     } else {
       // Otherwise this is array-like indexing. The local offset is the index
       // multiplied by the element size.
-      auto *ElementSize = llvm::ConstantInt::get(
-          IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType()));
+      auto *ElementSize =
+          llvm::ConstantInt::get(IntPtrTy, GTI.getSequentialElementStride(DL));
       auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true);
       LocalOffset = eval(BO_Mul, ElementSize, IndexS);
     }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7ad3ce512a3552..2be7256423e422 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1048,7 +1048,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
         if (TargetType->isScalableTy())
           return TTI::TCC_Basic;
         int64_t ElementSize =
-            DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
+            GTI.getSequentialElementStride(DL).getFixedValue();
         if (ConstIdx) {
           BaseOffset +=
               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;

diff  --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index f3272327c3f8b2..1092b636e023a2 100644
--- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
@@ -30,7 +31,39 @@ template <typename ItTy = User::const_op_iterator>
 class generic_gep_type_iterator {
 
   ItTy OpIt;
-  PointerUnion<StructType *, Type *> CurTy;
+  // We use two 
diff erent mechanisms to store the type a GEP index applies to.
+  // In some cases, we need to know the outer aggregate type the index is
+  // applied within, e.g. a struct. In such cases, we store the aggregate type
+  // in the iterator, and derive the element type on the fly.
+  //
+  // However, this is not always possible, because for the outermost index there
+  // is no containing type. In such cases, or if the containing type is not
+  // relevant, e.g. for arrays, the element type is stored as Type* in CurTy.
+  //
+  // If CurTy contains a Type* value, this does not imply anything about the
+  // type itself, because it is the element type and not the outer type.
+  // In particular, Type* can be a struct type.
+  //
+  // Consider this example:
+  //
+  //    %my.struct = type { i32, [ 4 x float ] }
+  //    [...]
+  //    %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3
+  //
+  // Iterating over the indices of this GEP, CurTy will contain the following
+  // values:
+  //    * i32 10: The outer index always operates on the GEP value type.
+  //              CurTy contains a Type*       pointing at `%my.struct`.
+  //    * i32 1:  This index is within a struct.
+  //              CurTy contains a StructType* pointing at `%my.struct`.
+  //    * i32 3:  This index is within an array. We reuse the "flat" indexing
+  //              for arrays which is also used in the top level GEP index.
+  //              CurTy contains a Type*       pointing at `float`.
+  //
+  // Vectors are handled separately because the layout of vectors is 
diff erent
+  // for overaligned elements: Vectors are always bit-packed, whereas arrays
+  // respect ABI alignment of the elements.
+  PointerUnion<StructType *, VectorType *, Type *> CurTy;
 
   generic_gep_type_iterator() = default;
 
@@ -69,6 +102,8 @@ class generic_gep_type_iterator {
   Type *getIndexedType() const {
     if (auto *T = dyn_cast_if_present<Type *>(CurTy))
       return T;
+    if (auto *VT = dyn_cast_if_present<VectorType *>(CurTy))
+      return VT->getElementType();
     return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand());
   }
 
@@ -79,7 +114,7 @@ class generic_gep_type_iterator {
     if (auto *ATy = dyn_cast<ArrayType>(Ty))
       CurTy = ATy->getElementType();
     else if (auto *VTy = dyn_cast<VectorType>(Ty))
-      CurTy = VTy->getElementType();
+      CurTy = VTy;
     else
       CurTy = dyn_cast<StructType>(Ty);
     ++OpIt;
@@ -108,7 +143,23 @@ class generic_gep_type_iterator {
   // that.
 
   bool isStruct() const { return isa<StructType *>(CurTy); }
-  bool isSequential() const { return isa<Type *>(CurTy); }
+  bool isVector() const { return isa<VectorType *>(CurTy); }
+  bool isSequential() const { return !isStruct(); }
+
+  // For sequential GEP indices (all except those into structs), the index value
+  // can be translated into a byte offset by multiplying with an element stride.
+  // This function returns this stride, which both depends on the element type,
+  // and the containing aggregate type, as vectors always tightly bit-pack their
+  // elements.
+  TypeSize getSequentialElementStride(const DataLayout &DL) const {
+    assert(isSequential());
+    Type *ElemTy = getIndexedType();
+    if (isVector()) {
+      assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable");
+      return DL.getTypeStoreSize(ElemTy);
+    }
+    return DL.getTypeAllocSize(ElemTy);
+  }
 
   StructType *getStructType() const { return cast<StructType *>(CurTy); }
 

diff  --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 97f60d28e49917..9eb7e914687cef 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           continue;
 
         // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
+        TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
         if (AllocTypeSize.isScalable()) {
           Decomposed.Base = V;
           return Decomposed;
@@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
-      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
+      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
       if (AllocTypeSize.isScalable()) {
         Decomposed.Base = V;
         return Decomposed;

diff  --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7096e06d925ade..1fa7badaa4fa01 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
       continue;
     }
 
-    APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
   return true;

diff  --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp
index 30757abeb09802..f5e080d2c78e65 100644
--- a/llvm/lib/Analysis/Local.cpp
+++ b/llvm/lib/Analysis/Local.cpp
@@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
     // Convert to correct type.
     if (Op->getType() != IntIdxTy)
       Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
-    TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
+    TypeSize TSize = GTI.getSequentialElementStride(DL);
     if (TSize != TypeSize::getFixed(1)) {
       Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
       if (IntIdxTy->isVectorTy())

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 89666018d92514..aed60cc5a3f5ef 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
 
     // If it's a type with the same allocation size as the result of the GEP we
     // can peel off the zero index.
-    if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
+    TypeSize ElemSize = GEPTI.isStruct()
+                            ? DL.getTypeAllocSize(GEPTI.getIndexedType())
+                            : GEPTI.getSequentialElementStride(DL);
+    if (ElemSize != GEPAllocSize)
       break;
     --LastOperand;
   }

diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 16d78c1ded6d7a..439127e5c95400 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1196,7 +1196,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
       unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
       KnownBits IndexBits(IndexBitWidth);
       computeKnownBits(Index, IndexBits, Depth + 1, Q);
-      TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
+      TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
       uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
       KnownBits ScalingFactor(IndexBitWidth);
       // Multiply by current sizeof type.
@@ -2128,7 +2128,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
     }
 
     // If we have a zero-sized type, the index doesn't matter. Keep looping.
-    if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero())
+    if (GTI.getSequentialElementStride(Q.DL).isZero())
       continue;
 
     // Fast path the constant operand case both for efficiency and so we don't

diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6e99fb133e26a9..5bd4c6b067d796 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4776,7 +4776,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
             cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
         ConstantOffset += SL->getElementOffset(Idx);
       } else {
-        TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
+        TypeSize TS = GTI.getSequentialElementStride(DL);
         if (TS.isNonZero()) {
           // The optimisations below currently only work for fixed offsets.
           if (TS.isScalable())

diff  --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bea29642cd0031..9c11113902a24e 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1545,7 +1545,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
       Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
       continue;
     } else {
-      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
 
       // If this is a scalar constant or a splat vector of constants,
       // handle it quickly.

diff  --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f3d8edb8926b66..09e260394262bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) {
         }
       }
     } else {
-      Type *Ty = GTI.getIndexedType();
-
       // If this is a constant subscript, handle it quickly.
       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero())
           continue;
         // N = N + Offset
         uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
-        TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
+        TotalOffs += GTI.getSequentialElementStride(DL) * IdxN;
         if (TotalOffs >= MaxOffs) {
           N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT);
           if (!N) // Unhandled operand. Halt "fast" selection and bail.
@@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      uint64_t ElementSize = GTI.getSequentialElementStride(DL);
       Register IdxN = getRegForGEPIndex(Idx);
       if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
         return false;

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 192f7bc8d2aa1b..78ebd2d33459a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4114,7 +4114,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
       MVT IdxTy = MVT::getIntegerVT(IdxSize);
       TypeSize ElementSize =
-          DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
+          GTI.getSequentialElementStride(DAG.getDataLayout());
       // We intentionally mask away the high bits here; ElementSize may not
       // fit in IdxTy.
       APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());

diff  --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 770fc93490835d..ae978070ac9f90 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
         assert(BitWidth == 64 && "Invalid index type for getelementptr");
         Idx = (int64_t)IdxGV.IntVal.getZExtValue();
       }
-      Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx;
+      Total += I.getSequentialElementStride(getDataLayout()) * Idx;
     }
   }
 

diff  --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index e28f043cf9e0d0..a2f5714c706874 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
       // Add in the offset, as calculated by the structure layout info...
       Result += Layout->getElementOffset(FieldNo);
     } else {
-      // Get the array index and the size of each array element.
-      if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
-        Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType());
+      if (int64_t ArrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
+        Result += ArrayIdx * GTI.getSequentialElementStride(*this);
     }
   }
 

diff  --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index cd982c7da102af..16a89534b4b3ec 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -87,7 +87,7 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
       /// If the index isn't known, we take 1 because it is the index that will
       /// give the worse alignment of the offset.
       const uint64_t ElemCount = OpC ? OpC->getZExtValue() : 1;
-      Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount;
+      Offset = GTI.getSequentialElementStride(DL) * ElemCount;
     }
     Result = Align(MinAlign(Offset, Result.value()));
   }
@@ -157,7 +157,7 @@ bool GEPOperator::accumulateConstantOffset(
         continue;
       }
       if (!AccumulateOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType())))
+                            GTI.getSequentialElementStride(DL)))
         return false;
       continue;
     }
@@ -170,8 +170,7 @@ bool GEPOperator::accumulateConstantOffset(
     if (!ExternalAnalysis(*V, AnalysisIndex))
       return false;
     UsedExternalAnalysis = true;
-    if (!AccumulateOffset(AnalysisIndex,
-                          DL.getTypeAllocSize(GTI.getIndexedType())))
+    if (!AccumulateOffset(AnalysisIndex, GTI.getSequentialElementStride(DL)))
       return false;
   }
   return true;
@@ -218,14 +217,13 @@ bool GEPOperator::collectOffset(
         continue;
       }
       CollectConstantOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType()));
+                            GTI.getSequentialElementStride(DL));
       continue;
     }
 
     if (STy || ScalableType)
       return false;
-    APInt IndexedSize =
-        APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    APInt IndexedSize = APInt(BitWidth, GTI.getSequentialElementStride(DL));
     // Insert an initial offset of 0 for V iff none exists already, then
     // increment the offset by IndexedSize.
     if (!IndexedSize.isZero()) {

diff  --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index b6e25c46b514d8..94b0ae7435c949 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1015,7 +1015,7 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
 
     // Otherwise, we have a sequential type like an array or fixed-length
     // vector. Multiply the index by the ElementSize.
-    TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
+    TypeSize Size = GTI.getSequentialElementStride(DL);
     if (Size.isScalable())
       return std::nullopt;
     Offset += Size.getFixedValue() * OpC->getSExtValue();

diff  --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 1ea63a5d6ec08d..e98f6c4984a752 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -645,7 +645,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         while (true) {
           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.
@@ -4978,15 +4978,13 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
       if (Field)
         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
     } else {
-      Type *Ty = GTI.getIndexedType();
-
       // If this is a constant subscript, handle it quickly.
       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero())
           continue;
         // N = N + Offset
-        TotalOffs +=
-            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+        TotalOffs += GTI.getSequentialElementStride(DL) *
+                     cast<ConstantInt>(CI)->getSExtValue();
         continue;
       }
       if (TotalOffs) {
@@ -4997,7 +4995,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      uint64_t ElementSize = GTI.getSequentialElementStride(DL);
       unsigned IdxN = getRegForGEPIndex(Idx);
       if (!IdxN)
         return false;

diff  --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 1d6aaeb7433b0f..cb3a709f7003bd 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -747,7 +747,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
         } else {
-          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+          uint64_t S = GTI.getSequentialElementStride(DL);
           while (true) {
             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
               // Constant-offset addressing.

diff  --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 7fcf375aa10b69..192ed1cec79a84 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -492,7 +492,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         while (true) {
           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.

diff  --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 42f5a4e624c494..56af80f9cedee8 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -350,7 +350,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
         } else {
-          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+          uint64_t S = GTI.getSequentialElementStride(DL);
           for (;;) {
             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
               // Constant-offset addressing.

diff  --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 5ad1e082344e77..1129206800ad36 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -362,7 +362,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
 
     VecOperand = i;
 
-    TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType());
+    TypeSize TS = GTI.getSequentialElementStride(*DL);
     if (TS.isScalable())
       return std::make_pair(nullptr, nullptr);
 

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 37abbb072cdd38..15dc44a0439573 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -278,7 +278,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        uint64_t S = GTI.getSequentialElementStride(DL);
         for (;;) {
           if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.

diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a72..c789ac82ac9433 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -916,7 +916,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
 
       // A array/variable index is always of the form i*S where S is the
       // constant scale size.  See if we can push the scale into immediates.
-      uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+      uint64_t S = GTI.getSequentialElementStride(DL);
       for (;;) {
         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
           // Constant-offset addressing.

diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 656abdb0abbffa..75cddfa16d6db5 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1097,10 +1097,8 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
           // For array or vector indices, scale the index by the size of the
           // type.
           APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
-          GEPOffset +=
-              Index *
-              APInt(Offset.getBitWidth(),
-                    DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue());
+          GEPOffset += Index * APInt(Offset.getBitWidth(),
+                                     GTI.getSequentialElementStride(DL));
         }
 
         // If this index has computed an intermediate pointer which is not

diff  --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index b8c9d9d100f117..225dd454068c84 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -843,7 +843,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
         // constant offset to a byte offset, and later offset the remainder of
         // the original GEP with this byte offset.
         AccumulativeByteOffset +=
-            ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+            ConstantOffset * GTI.getSequentialElementStride(*DL);
       }
     } else if (LowerGEP) {
       StructType *StTy = GTI.getStructType();
@@ -884,7 +884,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
           continue;
 
       APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
-                                DL->getTypeAllocSize(GTI.getIndexedType()));
+                                GTI.getSequentialElementStride(*DL));
       // Scale the index by element size.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
@@ -946,7 +946,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
           continue;
 
       APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
-                                DL->getTypeAllocSize(GTI.getIndexedType()));
+                                GTI.getSequentialElementStride(*DL));
       // Scale the index by element size.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {

diff  --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 543469d62fe732..ca1f3a0c0ae342 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -547,7 +547,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     // indices except this current one.
     const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
     Value *ArrayIdx = GEP->getOperand(I);
-    uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+    uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
     if (ArrayIdx->getType()->getIntegerBitWidth() <=
         DL->getIndexSizeInBits(GEP->getAddressSpace())) {
       // Skip factoring if ArrayIdx is wider than the index size, because

diff  --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index fa2459d1ca0287..1f11d4894f775c 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1193,7 +1193,7 @@ std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
       OpA->getType() != OpB->getType())
     return std::nullopt;
 
-  uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType());
+  uint64_t Stride = GTIA.getSequentialElementStride(DL);
 
   // Only look through a ZExt/SExt.
   if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))

diff  --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 373b7f5f2fc0a5..642c3eb2a0e41b 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64-f16:32"
 
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
@@ -111,6 +111,16 @@ define void @test_evaluate_gep_as_ptrs_array(ptr addrspace(2) %B) {
   ret void
 }
 
+; This should be turned into a constexpr instead of being an instruction
+define void @test_overaligned_vec(i8 %B) {
+; CHECK-LABEL: @test_overaligned_vec(
+; CHECK-NEXT:    store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1
+; CHECK-NEXT:    ret void
+  %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1
+  store i8 %B, ptr %A
+  ret void
+}
+
 define ptr @test7(ptr %I, i64 %C, i64 %D) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr [[I:%.*]], i64 [[C:%.*]]