[llvm] a8c318b - [BasicAA] Use index size instead of pointer size

Sun Nov 7 09:56:18 PST 2021

Author: Nikita Popov
Date: 2021-11-07T18:56:11+01:00
New Revision: a8c318b50eccc922bc76f22e1791b0420574187d

URL: https://github.com/llvm/llvm-project/commit/a8c318b50eccc922bc76f22e1791b0420574187d
DIFF: https://github.com/llvm/llvm-project/commit/a8c318b50eccc922bc76f22e1791b0420574187d.diff

LOG: [BasicAA] Use index size instead of pointer size

When accumulating the GEP offset in BasicAA, we should use the
pointer index size rather than the pointer size.

Differential Revision: https://reviews.llvm.org/D112370

Added: 
    llvm/test/Analysis/BasicAA/index-size.ll

Modified: 
    llvm/include/llvm/IR/DataLayout.h
    llvm/lib/Analysis/BasicAliasAnalysis.cpp
    llvm/lib/IR/DataLayout.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index e65b128883e99..46acd403bef1c 100644

--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -377,8 +377,8 @@ class DataLayout {
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
-  /// Returns the maximum pointer size over all address spaces.
-  unsigned getMaxPointerSize() const;
+  /// Returns the maximum index size over all address spaces.
+  unsigned getMaxIndexSize() const;
 
   // Index size used for address calculation.
   unsigned getIndexSize(unsigned AS) const;
@@ -410,9 +410,9 @@ class DataLayout {
     return getPointerSize(AS) * 8;
   }
 
-  /// Returns the maximum pointer size over all address spaces.
-  unsigned getMaxPointerSizeInBits() const {
-    return getMaxPointerSize() * 8;
+  /// Returns the maximum index size over all address spaces.
+  unsigned getMaxIndexSizeInBits() const {
+    return getMaxIndexSize() * 8;
   }
 
   /// Size in bits of index used for address calculation in getelementptr.

diff  --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c7a81514d00a1..88b0f37b1d48a 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -465,14 +465,14 @@ static LinearExpression GetLinearExpression(
   return Val;
 }
 
-/// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// To ensure a pointer offset fits in an integer of size IndexSize
+/// (in bits) when that size is smaller than the maximum index size. This is
 /// an issue, for example, in particular for 32b pointers with negative indices
 /// that rely on two's complement wrap-arounds for precise alias information
-/// where the maximum pointer size is 64b.
-static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) {
-  assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
-  unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+/// where the maximum index size is 64b.
+static APInt adjustToIndexSize(const APInt &Offset, unsigned IndexSize) {
+  assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!");
+  unsigned ShiftBits = Offset.getBitWidth() - IndexSize;
   return (Offset << ShiftBits).ashr(ShiftBits);
 }
 
@@ -549,9 +549,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
   SearchTimes++;
   const Instruction *CxtI = dyn_cast<Instruction>(V);
 
-  unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
+  unsigned MaxIndexSize = DL.getMaxIndexSizeInBits();
   DecomposedGEP Decomposed;
-  Decomposed.Offset = APInt(MaxPointerSize, 0);
+  Decomposed.Offset = APInt(MaxIndexSize, 0);
   do {
     // See if this is a bitcast or GEP.
     const Operator *Op = dyn_cast<Operator>(V);
@@ -620,7 +620,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
-    unsigned PointerSize = DL.getPointerSizeInBits(AS);
+    unsigned IndexSize = DL.getIndexSizeInBits(AS);
     // Assume all GEP operands are constants until proven otherwise.
     bool GepHasConstantOffset = true;
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
@@ -643,26 +643,26 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           continue;
         Decomposed.Offset +=
             DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() *
-            CIdx->getValue().sextOrTrunc(MaxPointerSize);
+            CIdx->getValue().sextOrTrunc(MaxIndexSize);
         continue;
       }
 
       GepHasConstantOffset = false;
 
-      // If the integer type is smaller than the pointer size, it is implicitly
-      // sign extended to pointer size.
+      // If the integer type is smaller than the index size, it is implicitly
+      // sign extended or truncated to index size.
       unsigned Width = Index->getType()->getIntegerBitWidth();
-      unsigned SExtBits = PointerSize > Width ? PointerSize - Width : 0;
-      unsigned TruncBits = PointerSize < Width ? Width - PointerSize : 0;
+      unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
+      unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
       LinearExpression LE = GetLinearExpression(
           CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
 
       // Scale by the type size.
       unsigned TypeSize =
           DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
-      LE = LE.mul(APInt(PointerSize, TypeSize), GEPOp->isInBounds());
-      Decomposed.Offset += LE.Offset.sextOrSelf(MaxPointerSize);
-      APInt Scale = LE.Scale.sextOrSelf(MaxPointerSize);
+      LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
+      Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize);
+      APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize);
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
@@ -678,8 +678,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // pointer size.
-      Scale = adjustToPointerSize(Scale, PointerSize);
+      // index size.
+      Scale = adjustToIndexSize(Scale, IndexSize);
 
       if (!!Scale) {
         VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW};
@@ -689,7 +689,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
     // Take care of wrap-arounds
     if (GepHasConstantOffset)
-      Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize);
+      Decomposed.Offset = adjustToIndexSize(Decomposed.Offset, IndexSize);
 
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
@@ -1258,7 +1258,7 @@ AliasResult BasicAAResult::aliasGEP(
     CR = Index.Val.evaluateWith(CR).sextOrTrunc(OffsetRange.getBitWidth());
 
     assert(OffsetRange.getBitWidth() == Scale.getBitWidth() &&
-           "Bit widths are normalized to MaxPointerSize");
+           "Bit widths are normalized to MaxIndexSize");
     if (Index.IsNSW)
       OffsetRange = OffsetRange.add(CR.smul_sat(ConstantRange(Scale)));
     else

diff  --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 5edff7a741362..2ace180482628 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -707,12 +707,12 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
   return getPointerAlignElem(AS).TypeByteWidth;
 }
 
-unsigned DataLayout::getMaxPointerSize() const {
-  unsigned MaxPointerSize = 0;
+unsigned DataLayout::getMaxIndexSize() const {
+  unsigned MaxIndexSize = 0;
   for (auto &P : Pointers)
-    MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+    MaxIndexSize = std::max(MaxIndexSize, P.IndexWidth);
 
-  return MaxPointerSize;
+  return MaxIndexSize;
 }
 
 unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 3851300f615e7..0d3f516932618 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -506,7 +506,7 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
   // This is an "architecture", not a "feature", but we emit it as such for
   // the benefit of tools like Binaryen and consistency with other producers.
   // FIXME: Subtarget is null here, so can't Subtarget->hasAddr64() ?
-  if (M.getDataLayout().getMaxPointerSize() == 8) {
+  if (M.getDataLayout().getPointerSize() == 8) {
     // Can't use EmitFeature since "wasm-feature-memory64" is not a module
     // flag.
     EmittedFeatures.push_back({wasm::WASM_FEATURE_PREFIX_USED, "memory64"});

diff  --git a/llvm/test/Analysis/BasicAA/index-size.ll b/llvm/test/Analysis/BasicAA/index-size.ll
new file mode 100644
index 0000000000000..da365014dd6bb
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/index-size.ll
@@ -0,0 +1,18 @@
+; RUN: opt -basic-aa -aa-eval -print-all-alias-modref-info -disable-output %s 2>&1 | FileCheck %s
+
+target datalayout = "p:64:64:64:32"
+
+; gep.1 and gep.2 must alias, because they are truncated to the index size
+; (32-bit), not the pointer size (64-bit).
+define void @mustalias_due_to_index_size(i8* %ptr) {
+; CHECK-LABEL: Function: mustalias_due_to_index_size
+; CHECK-NEXT: MustAlias: i8* %gep.1, i8* %ptr
+; CHECK-NEXT: MustAlias: i8* %gep.2, i8* %ptr
+; CHECK-NEXT: MustAlias: i8* %gep.1, i8* %gep.2
+;
+  %gep.1 = getelementptr i8, i8* %ptr, i64 4294967296
+  store i8 0, i8* %gep.1
+  %gep.2 = getelementptr i8, i8* %ptr, i64 0
+  store i8 1, i8* %gep.2
+  ret void
+}