[clang] [llvm] [BasicAA] Do not decompose past casts with different index width (PR #119365)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 06:44:55 PST 2024


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/119365

>From 361b821793862e80ca61df906e7d914fd5bd2fc1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 10 Dec 2024 12:51:03 +0100
Subject: [PATCH 1/2] [BasicAA] Do not decompose past casts with different
 index width

BasicAA currently tries to support addrspacecasts that change the
index width by performing the decomposition in the maximum of all
index widths and then trying to fix this up with in-place sign
extends to get correct overflow behavior if the actual index width
is smaller.

However, even in the case where we don't mix different index
widths and just have an index width that is smaller than the
maximum, the behavior is incorrect (see test), because we only
perform the index width adjustment during decomposition and not
any of the later logic -- and we don't do anything at all for
variable offsets. I'm sure that the case where we actually mix
different index widths is even more broken than that.

Fix this by not allowing decomposition through index width
changes. If the pointers have different index widths, fall back
to a base object comparison, ignoring the offsets.
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp      | 65 ++++++++-----------
 .../BasicAA/smaller-index-size-overflow.ll    |  3 +-
 2 files changed, 27 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index dc54d566e55b12..b2a3f3390e000d 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -494,20 +494,6 @@ static LinearExpression GetLinearExpression(
   return Val;
 }
 
-/// To ensure a pointer offset fits in an integer of size IndexSize
-/// (in bits) when that size is smaller than the maximum index size. This is
-/// an issue, for example, in particular for 32b pointers with negative indices
-/// that rely on two's complement wrap-arounds for precise alias information
-/// where the maximum index size is 64b.
-static void adjustToIndexSize(APInt &Offset, unsigned IndexSize) {
-  assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!");
-  unsigned ShiftBits = Offset.getBitWidth() - IndexSize;
-  if (ShiftBits != 0) {
-    Offset <<= ShiftBits;
-    Offset.ashrInPlace(ShiftBits);
-  }
-}
-
 namespace {
 // A linear transformation of a Value; this class represents
 // ZExt(SExt(Trunc(V, TruncBits), SExtBits), ZExtBits) * Scale.
@@ -594,9 +580,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
   SearchTimes++;
   const Instruction *CxtI = dyn_cast<Instruction>(V);
 
-  unsigned MaxIndexSize = DL.getMaxIndexSizeInBits();
+  unsigned IndexSize = DL.getIndexTypeSizeInBits(V->getType());
   DecomposedGEP Decomposed;
-  Decomposed.Offset = APInt(MaxIndexSize, 0);
+  Decomposed.Offset = APInt(IndexSize, 0);
   do {
     // See if this is a bitcast or GEP.
     const Operator *Op = dyn_cast<Operator>(V);
@@ -614,7 +600,14 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
     if (Op->getOpcode() == Instruction::BitCast ||
         Op->getOpcode() == Instruction::AddrSpaceCast) {
-      V = Op->getOperand(0);
+      Value *NewV = Op->getOperand(0);
+      // Don't look through casts between address spaces with differing index
+      // widths.
+      if (DL.getIndexTypeSizeInBits(NewV->getType()) != IndexSize) {
+        Decomposed.Base = V;
+        return Decomposed;
+      }
+      V = NewV;
       continue;
     }
 
@@ -651,12 +644,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
     assert(GEPOp->getSourceElementType()->isSized() && "GEP must be sized");
 
-    unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
-    unsigned IndexSize = DL.getIndexSizeInBits(AS);
-    // Assume all GEP operands are constants until proven otherwise.
-    bool GepHasConstantOffset = true;
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
@@ -684,7 +673,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         }
 
         Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
+                             CIdx->getValue().sextOrTrunc(IndexSize);
         continue;
       }
 
@@ -694,8 +683,6 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         return Decomposed;
       }
 
-      GepHasConstantOffset = false;
-
       // If the integer type is smaller than the index size, it is implicitly
       // sign extended or truncated to index size.
       bool NUSW = GEPOp->hasNoUnsignedSignedWrap();
@@ -710,8 +697,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       // Scale by the type size.
       unsigned TypeSize = AllocTypeSize.getFixedValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), NUW, NUSW);
-      Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
-      APInt Scale = LE.Scale.sext(MaxIndexSize);
+      Decomposed.Offset += LE.Offset;
+      APInt Scale = LE.Scale;
       if (!LE.IsNUW)
         Decomposed.NWFlags = Decomposed.NWFlags.withoutNoUnsignedWrap();
 
@@ -731,10 +718,6 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         }
       }
 
-      // Make sure that we have a scale that makes sense for this target's
-      // index size.
-      adjustToIndexSize(Scale, IndexSize);
-
       if (!!Scale) {
         VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
                                   /* IsNegated */ false};
@@ -742,10 +725,6 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
     }
 
-    // Take care of wrap-arounds
-    if (GepHasConstantOffset)
-      adjustToIndexSize(Decomposed.Offset, IndexSize);
-
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
   } while (--MaxLookup);
@@ -1084,6 +1063,14 @@ AliasResult BasicAAResult::aliasGEP(
     const GEPOperator *GEP1, LocationSize V1Size,
     const Value *V2, LocationSize V2Size,
     const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
+  auto BaseObjectsAlias = [&]() {
+    AliasResult BaseAlias =
+        AAQI.AAR.alias(MemoryLocation::getBeforeOrAfter(UnderlyingV1),
+                       MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI);
+    return BaseAlias == AliasResult::NoAlias ? AliasResult::NoAlias
+                                             : AliasResult::MayAlias;
+  };
+
   if (!V1Size.hasValue() && !V2Size.hasValue()) {
     // TODO: This limitation exists for compile-time reasons. Relax it if we
     // can avoid exponential pathological cases.
@@ -1092,11 +1079,7 @@ AliasResult BasicAAResult::aliasGEP(
 
     // If both accesses have unknown size, we can only check whether the base
     // objects don't alias.
-    AliasResult BaseAlias =
-        AAQI.AAR.alias(MemoryLocation::getBeforeOrAfter(UnderlyingV1),
-                       MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI);
-    return BaseAlias == AliasResult::NoAlias ? AliasResult::NoAlias
-                                             : AliasResult::MayAlias;
+    return BaseObjectsAlias();
   }
 
   DominatorTree *DT = getDT(AAQI);
@@ -1107,6 +1090,10 @@ AliasResult BasicAAResult::aliasGEP(
   if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
     return AliasResult::MayAlias;
 
+  // Fall back to base objects if pointers have different index widths.
+  if (DecompGEP1.Offset.getBitWidth() != DecompGEP2.Offset.getBitWidth())
+    return BaseObjectsAlias();
+
   // Swap GEP1 and GEP2 if GEP2 has more variable indices.
   if (DecompGEP1.VarIndices.size() < DecompGEP2.VarIndices.size()) {
     std::swap(DecompGEP1, DecompGEP2);
diff --git a/llvm/test/Analysis/BasicAA/smaller-index-size-overflow.ll b/llvm/test/Analysis/BasicAA/smaller-index-size-overflow.ll
index a913a4a9e4b1c8..fc91b577a56cc3 100644
--- a/llvm/test/Analysis/BasicAA/smaller-index-size-overflow.ll
+++ b/llvm/test/Analysis/BasicAA/smaller-index-size-overflow.ll
@@ -2,8 +2,7 @@
 
 target datalayout = "p1:32:32"
 
-; FIXME: This is a miscompile.
-; CHECK: NoAlias:	i32 addrspace(1)* %gep1, i32 addrspace(1)* %gep2
+; CHECK: PartialAlias:	i32 addrspace(1)* %gep1, i32 addrspace(1)* %gep2
 define void @test(ptr addrspace(1) %p) {
   %gep1 = getelementptr i8, ptr addrspace(1) %p, i32 u0x7fffffff
   %gep2 = getelementptr i8, ptr addrspace(1) %p, i32 u0x80000001

>From b250aafde14ebf3dbb7de781d3e12c352b6e322a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 10 Dec 2024 15:44:34 +0100
Subject: [PATCH 2/2] Update test

---
 clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes-malloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes-malloc.c b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes-malloc.c
index 1b05e8aa5052af..c0ca3dab512584 100644
--- a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes-malloc.c
+++ b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes-malloc.c
@@ -4,7 +4,7 @@ void *__malloc31(size_t);
 
 int test_1() {
   // X64-LABEL: define {{.*}} i32 @test_1()
-  // X64: ret i32 135
+  // X64: ret i32 %add20
   int *__ptr32 a;
   int *b;
   int i;



More information about the llvm-commits mailing list