[PATCH] D99642: For non-null pointer checks, do not descend through out-of-bounds GEPs

Momchil Velikov via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 31 01:43:30 PDT 2021


chill created this revision.
Herald added a subscriber: hiraditya.
chill requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

In `LazyValueInfoImpl::isNonNullAtEndOfBlock` we populate a set of
pointers, known to be non-null at the end of a block (e.g. because we
did a load through them). We then infer that any pointer, based on an
element of this set is non-null as well ("based" here meaning a
non-null pointer is the underlying object). This is incorrect, even if
the base pointer was non-null, the value of a GEP, that lacks the
inbounds` attribute, may be `null`.

This issue appeared as miscompilation of the following test case:

  int puts(const char *);
      
  typedef struct iter {
    int *val;
  } iter_t;
      
  static long distance(iter_t first, iter_t last) {
    long r = 0;
    for (; first.val != last.val; first.val++)
      ++r;
    return r;
  }
      
  int main() {
    int arr[2] = {0};
    iter_t i, j;
    i.val = arr;
    j.val = arr + 1;
    if (distance(i, j) >= 2)
      puts("failed");
    else
      puts("passed");
  }

This fixes PR49662.


https://reviews.llvm.org/D99642

Files:
  llvm/include/llvm/Analysis/ValueTracking.h
  llvm/lib/Analysis/LazyValueInfo.cpp
  llvm/lib/Analysis/ValueTracking.cpp
  llvm/test/Analysis/ValueTracking/unknown-nonnull-gep-out-of-bounds.ll


Index: llvm/test/Analysis/ValueTracking/unknown-nonnull-gep-out-of-bounds.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/ValueTracking/unknown-nonnull-gep-out-of-bounds.ll
@@ -0,0 +1,35 @@
+; RUN: opt -jump-threading -S %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [5 x i8] c"fail\00", align 1
+ at .str.1 = private unnamed_addr constant [5 x i8] c"pass\00", align 1
+
+define dso_local i32 @main() local_unnamed_addr {
+entry:
+  %a = alloca i64, align 8
+  store i64 0, i64* %a, align 8
+  %i = ptrtoint i64* %a to i64
+  %p = bitcast i64* %a to i8*
+  %0 = sub i64 0, %i
+  %q = getelementptr i8, i8* %p, i64 %0
+  %c = icmp eq i8* %q, null
+  ; `%a` is non-null at the end of the block, because we store through it.
+  ; However, `%q` is derived from `%a` via a GEP that is not `inbounds`, therefore we cannot judge `%q` is non-null as well
+  ; and must retain the `icmp` instruction.
+  ; CHECK: %c = icmp eq i8* %q, null
+  br i1 %c, label %if.else, label %if.then
+if.then:
+  %call0 = call i32 @puts(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0))
+  br label %if.end
+
+if.else:
+  %call1 = call i32 @puts(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0))
+  br label %if.end
+
+if.end:
+  ret i32 0
+}
+
+declare dso_local i32 @puts(i8*) local_unnamed_addr
Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================
--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -4266,11 +4266,14 @@
   return true;
 }
 
-const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
+const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup,
+                                       bool InBounds) {
   if (!V->getType()->isPointerTy())
     return V;
   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
+      if (InBounds && !GEP->isInBounds())
+        return V;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast ||
                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
Index: llvm/lib/Analysis/LazyValueInfo.cpp
===================================================================
--- llvm/lib/Analysis/LazyValueInfo.cpp
+++ llvm/lib/Analysis/LazyValueInfo.cpp
@@ -658,7 +658,7 @@
                            Val->getType()->getPointerAddressSpace()))
     return false;
 
-  Val = getUnderlyingObject(Val);
+  Val = getUnderlyingObjectInBounds(Val);
   return TheCache.isNonNullAtEndOfBlock(Val, BB, [](BasicBlock *BB) {
     NonNullPointerSet NonNullPointers;
     for (Instruction &I : *BB)
Index: llvm/include/llvm/Analysis/ValueTracking.h
===================================================================
--- llvm/include/llvm/Analysis/ValueTracking.h
+++ llvm/include/llvm/Analysis/ValueTracking.h
@@ -369,13 +369,18 @@
   /// the specified value, returning the original object being addressed. Note
   /// that the returned value has pointer type if the specified value does. If
   /// the MaxLookup value is non-zero, it limits the number of instructions to
-  /// be stripped off.
-  const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6);
+  /// be stripped off. If `InBounds` is true, then the method strips off a GEP
+  /// only if it has the `inbounds` attribute.
+  const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6,
+                                   bool InBounds = false);
   inline Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6) {
     // Force const to avoid infinite recursion.
     const Value *VConst = V;
     return const_cast<Value *>(getUnderlyingObject(VConst, MaxLookup));
   }
+  inline Value *getUnderlyingObjectInBounds(Value *V, unsigned MaxLookup = 6) {
+    return const_cast<Value *>(getUnderlyingObject(V, MaxLookup, true));
+  }
 
   /// This method is similar to getUnderlyingObject except that it can
   /// look through phi and select instructions and return multiple objects.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D99642.334378.patch
Type: text/x-patch
Size: 4364 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210331/4a7805d1/attachment.bin>


More information about the llvm-commits mailing list