[llvm] [llvm] Use LazyValueInfo to improve llvm.objectsize computation (PR #114673)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 5 07:35:28 PST 2024


https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/114673

>From 95739c6f2d1b3216651431bf49ba60112710c8a1 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Sat, 2 Nov 2024 10:02:22 +0100
Subject: [PATCH] [llvm] Use computeConstantRange to improve llvm.objectsize
 computation

Using computeConstantRange, it is possible to compute valuable information for
allocation functions, GEP and alloca, even in the presence of some dynamic
information.

llvm.objectsize plays an important role in _FORTIFY_SOURCE definitions,
so improving its diagnostic in turns improves the security of compiled
application.

As a side note, as a result of recent optimization improvements, clang no
longer passes https://github.com/serge-sans-paille/builtin_object_size-test-suite
This commit restores the situation and greatly improves the scope of
code handled by the static version of __builtin_object_size.
---
 llvm/include/llvm/Analysis/MemoryBuiltins.h   |  12 ++-
 llvm/lib/Analysis/MemoryBuiltins.cpp          | 102 +++++++++++++++++-
 .../InstCombine/InstructionCombining.cpp      |   5 +-
 .../Scalar/LowerConstantIntrinsics.cpp        |   2 +-
 .../builtin-object-size-range.ll              |  76 +++++++++++++
 5 files changed, 190 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll

diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index c3b11cdf5cf5db..9c52896ead4f69 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -32,6 +32,7 @@ class AAResults;
 class Argument;
 class ConstantPointerNull;
 class DataLayout;
+class DominatorTree;
 class ExtractElementInst;
 class ExtractValueInst;
 class GEPOperator;
@@ -160,8 +161,10 @@ struct ObjectSizeOpts {
   /// though they can't be evaluated. Otherwise, null is always considered to
   /// point to a 0 byte region of memory.
   bool NullIsUnknownSize = false;
-  /// If set, used for more accurate evaluation
+  /// If set, used for more accurate evaluation.
   AAResults *AA = nullptr;
+  /// If set, used for more accurate evaluation.
+  DominatorTree *DT = nullptr;
 };
 
 /// Compute the size of the object pointed by Ptr. Returns true and the
@@ -186,6 +189,12 @@ Value *lowerObjectSizeCall(
     const TargetLibraryInfo *TLI, AAResults *AA, bool MustSucceed,
     SmallVectorImpl<Instruction *> *InsertedInstructions = nullptr);
 
+Value *lowerObjectSizeCall(
+    IntrinsicInst *ObjectSize, const DataLayout &DL,
+    const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
+    bool MustSucceed,
+    SmallVectorImpl<Instruction *> *InsertedInstructions = nullptr);
+
 /// SizeOffsetType - A base template class for the object size visitors. Used
 /// here as a self-documenting way to handle the values rather than using a
 /// \p std::pair.
@@ -275,6 +284,7 @@ class ObjectSizeOffsetVisitor
   OffsetSpan visitExtractValueInst(ExtractValueInst &I);
   OffsetSpan visitGlobalAlias(GlobalAlias &GA);
   OffsetSpan visitGlobalVariable(GlobalVariable &GV);
+  OffsetSpan visitGetElementPtr(GetElementPtrInst &GEP);
   OffsetSpan visitIntToPtrInst(IntToPtrInst &);
   OffsetSpan visitLoadInst(LoadInst &I);
   OffsetSpan visitPHINode(PHINode &);
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 71400ac46bdcbf..94bf41a576ca40 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -590,19 +591,28 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
                                  const TargetLibraryInfo *TLI,
                                  bool MustSucceed) {
   return lowerObjectSizeCall(ObjectSize, DL, TLI, /*AAResults=*/nullptr,
-                             MustSucceed);
+                             /*DT=*/nullptr, MustSucceed);
 }
 
 Value *llvm::lowerObjectSizeCall(
     IntrinsicInst *ObjectSize, const DataLayout &DL,
     const TargetLibraryInfo *TLI, AAResults *AA, bool MustSucceed,
     SmallVectorImpl<Instruction *> *InsertedInstructions) {
+  return lowerObjectSizeCall(ObjectSize, DL, TLI, AA, /*DT=*/nullptr,
+                             MustSucceed, InsertedInstructions);
+}
+
+Value *llvm::lowerObjectSizeCall(
+    IntrinsicInst *ObjectSize, const DataLayout &DL,
+    const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
+    bool MustSucceed, SmallVectorImpl<Instruction *> *InsertedInstructions) {
   assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
          "ObjectSize must be a call to llvm.objectsize!");
 
   bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
   ObjectSizeOpts EvalOptions;
   EvalOptions.AA = AA;
+  EvalOptions.DT = DT;
 
   // Unless we have to fold this to something, try to be as accurate as
   // possible.
@@ -716,7 +726,6 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) {
   // value that is passed to computeImpl.
   IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
   Zero = APInt::getZero(IntTyBits);
-
   OffsetSpan ORT = computeValue(V);
 
   bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits;
@@ -794,6 +803,26 @@ OffsetSpan ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
     Size = Size.umul_ov(NumElems, Overflow);
     return Overflow ? ObjectSizeOffsetVisitor::unknown()
                     : OffsetSpan(Zero, align(Size, I.getAlign()));
+  } else {
+    ConstantRange CR =
+        computeConstantRange(ArraySize, /*ForSigned*/ false,
+                             /*UseInstrInfo*/ true, /*AC=*/nullptr,
+                             /*CtxtI=*/&I, /*DT=*/Options.DT);
+    if (CR.isFullSet())
+      return ObjectSizeOffsetVisitor::unknown();
+    APInt Bound;
+    if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
+      Bound = CR.getUnsignedMax();
+      // Upper bound actually unknown.
+      if (Bound.isMaxValue())
+        return ObjectSizeOffsetVisitor::unknown();
+    } else {
+      Bound = CR.getUnsignedMin();
+      // Lower bound actually unknown.
+      if (Bound.isMinValue())
+        return ObjectSizeOffsetVisitor::unknown();
+    }
+    return OffsetSpan(Zero, align(Bound, I.getAlign()));
   }
   return ObjectSizeOffsetVisitor::unknown();
 }
@@ -811,7 +840,32 @@ OffsetSpan ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
 }
 
 OffsetSpan ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
-  if (std::optional<APInt> Size = getAllocSize(&CB, TLI))
+  if (std::optional<APInt> Size =
+          getAllocSize(&CB, TLI, [&CB, this](const Value *V) -> const Value * {
+            if (!V->getType()->isIntegerTy())
+              return V;
+            if (isa<ConstantInt>(V))
+              return V;
+            ConstantRange CR = computeConstantRange(
+                V, /*ForSigned*/ false, /*UseInstrInfo*/ true, /*AC=*/nullptr,
+                /*CtxtI=*/&CB, /*DT=*/Options.DT);
+            if (CR.isFullSet())
+              return V;
+
+            APInt Bound;
+            if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
+              Bound = CR.getUnsignedMax();
+              // Upper bound actually unknown.
+              if (Bound.isMaxValue())
+                return V;
+            } else {
+              Bound = CR.getUnsignedMin();
+              // Lower bound actually unknown.
+              if (Bound.isMinValue())
+                return V;
+            }
+            return ConstantInt::get(V->getType(), Bound);
+          }))
     return OffsetSpan(Zero, *Size);
   return ObjectSizeOffsetVisitor::unknown();
 }
@@ -856,6 +910,48 @@ OffsetSpan ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV) {
   return OffsetSpan(Zero, align(Size, GV.getAlign()));
 }
 
+OffsetSpan ObjectSizeOffsetVisitor::visitGetElementPtr(GetElementPtrInst &GEP) {
+  OffsetSpan PtrData = computeImpl(GEP.getPointerOperand());
+  if (!PtrData.bothKnown())
+    return ObjectSizeOffsetVisitor::unknown();
+
+  if (Options.EvalMode == ObjectSizeOpts::Mode::Min ||
+      Options.EvalMode == ObjectSizeOpts::Mode::Max) {
+    unsigned BitWidth = PtrData.After.getBitWidth();
+    APInt ConstantOffset = Zero;
+    SmallMapVector<Value *, APInt, 4> VariableOffsets;
+    if (!GEP.collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
+      return ObjectSizeOffsetVisitor::unknown();
+
+    ConstantRange AccumulatedRange = ConstantOffset;
+    for (auto const &VO : VariableOffsets) {
+      ConstantRange CR = computeConstantRange(
+          VO.first, /*ForSigned*/ true, /*UseInstrInfo*/ true, /*AC=*/nullptr,
+          /*CtxtI=*/&GEP, /*DT=*/Options.DT);
+      if (CR.isFullSet())
+        return ObjectSizeOffsetVisitor::unknown();
+
+      AccumulatedRange = AccumulatedRange.add(CR.multiply(VO.second));
+    }
+
+    APInt Bound;
+    if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
+      Bound = AccumulatedRange.getSignedMax();
+      // Upper bound actually unknown.
+      if (Bound.isMaxSignedValue())
+        return ObjectSizeOffsetVisitor::unknown();
+    } else {
+      Bound = AccumulatedRange.getSignedMin();
+      // Lower bound actually unknown.
+      if (Bound.isMinSignedValue())
+        return ObjectSizeOffsetVisitor::unknown();
+    }
+
+    return {PtrData.Before + Bound, PtrData.After - Bound};
+  }
+  return ObjectSizeOffsetVisitor::unknown();
+}
+
 OffsetSpan ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst &) {
   // clueless
   return ObjectSizeOffsetVisitor::unknown();
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2a54390c0f1882..2ced0c80b1a8fa 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3317,8 +3317,9 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
         if (II->getIntrinsicID() == Intrinsic::objectsize) {
           SmallVector<Instruction *> InsertedInstructions;
-          Value *Result = lowerObjectSizeCall(
-              II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
+          Value *Result =
+              lowerObjectSizeCall(II, DL, &TLI, AA, &DT,
+                                  /*MustSucceed=*/true, &InsertedInstructions);
           for (Instruction *Inserted : InsertedInstructions)
             Worklist.add(Inserted);
           replaceInstUsesWith(*I, Result);
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index dcbec927e55703..1b164042502343 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -143,7 +143,7 @@ bool llvm::lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI,
       IsConstantIntrinsicsHandled++;
       break;
     case Intrinsic::objectsize:
-      NewValue = lowerObjectSizeCall(II, DL, &TLI, true);
+      NewValue = lowerObjectSizeCall(II, DL, &TLI, /*AA=*/nullptr, DT, true);
       LLVM_DEBUG(dbgs() << "Folding " << *II << " to " << *NewValue << "\n");
       ObjectSizeIntrinsicsHandled++;
       break;
diff --git a/llvm/test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll b/llvm/test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll
new file mode 100644
index 00000000000000..0017f8657193e9
--- /dev/null
+++ b/llvm/test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=lower-constant-intrinsics -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i64 @llvm.objectsize.i64.p0(ptr, i1 immarg, i1 immarg, i1 immarg)
+declare noalias ptr @malloc(i64 noundef) #0
+
+define i64 @select_alloc_size(i1 %cond) {
+; CHECK-LABEL: @select_alloc_size(
+; CHECK-NEXT:    [[SIZE:%.*]] = select i1 [[COND:%.*]], i64 3, i64 4
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i64 4, i64 3
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %size = select i1 %cond, i64 3, i64 4
+  %ptr = alloca i8, i64 %size
+  %objsize_max = call i64 @llvm.objectsize.i64.p0(ptr %ptr, i1 false, i1 true, i1 false)
+  %objsize_min = call i64 @llvm.objectsize.i64.p0(ptr %ptr, i1 true, i1 true, i1 false)
+  %res = select i1 %cond, i64 %objsize_max, i64 %objsize_min
+  ret i64 %res
+}
+
+define i64 @select_malloc_size(i1 %cond) {
+; CHECK-LABEL: @select_malloc_size(
+; CHECK-NEXT:    [[SIZE:%.*]] = select i1 [[COND:%.*]], i64 3, i64 4
+; CHECK-NEXT:    [[PTR:%.*]] = call noalias ptr @malloc(i64 noundef [[SIZE]])
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i64 4, i64 3
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %size = select i1 %cond, i64 3, i64 4
+  %ptr = call noalias ptr @malloc(i64 noundef %size)
+  %objsize_max = call i64 @llvm.objectsize.i64.p0(ptr %ptr, i1 false, i1 true, i1 false)
+  %objsize_min = call i64 @llvm.objectsize.i64.p0(ptr %ptr, i1 true, i1 true, i1 false)
+  %res = select i1 %cond, i64 %objsize_max, i64 %objsize_min
+  ret i64 %res
+}
+
+define i64 @select_gep_offset(i1 %cond) {
+; CHECK-LABEL: @select_gep_offset(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, i64 10, align 1
+; CHECK-NEXT:    [[OFFSET:%.*]] = select i1 [[COND:%.*]], i64 3, i64 4
+; CHECK-NEXT:    [[PTR_SLIDE:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[OFFSET]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i64 7, i64 6
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %ptr = alloca i8, i64 10
+  %offset = select i1 %cond, i64 3, i64 4
+  %ptr.slide = getelementptr inbounds i8, ptr %ptr, i64 %offset
+  %objsize_max = call i64 @llvm.objectsize.i64.p0(ptr %ptr.slide, i1 false, i1 true, i1 false)
+  %objsize_min = call i64 @llvm.objectsize.i64.p0(ptr %ptr.slide, i1 true, i1 true, i1 false)
+  %res = select i1 %cond, i64 %objsize_max, i64 %objsize_min
+  ret i64 %res
+}
+
+define i64 @select_gep_neg_offset(i1 %cond) {
+; CHECK-LABEL: @select_gep_neg_offset(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, i64 10, align 1
+; CHECK-NEXT:    [[PTR_SLIDE_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5
+; CHECK-NEXT:    [[OFFSET:%.*]] = select i1 [[COND:%.*]], i64 -3, i64 -4
+; CHECK-NEXT:    [[PTR_SLIDE_2:%.*]] = getelementptr inbounds i8, ptr [[PTR_SLIDE_1]], i64 [[OFFSET]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i64 9, i64 8
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %ptr = alloca i8, i64 10
+  %ptr.slide.1 = getelementptr inbounds i8, ptr %ptr, i64 5
+  %offset = select i1 %cond, i64 -3, i64 -4
+  %ptr.slide.2 = getelementptr inbounds i8, ptr %ptr.slide.1, i64 %offset
+  %objsize_max = call i64 @llvm.objectsize.i64.p0(ptr %ptr.slide.2, i1 false, i1 true, i1 false)
+  %objsize_min = call i64 @llvm.objectsize.i64.p0(ptr %ptr.slide.2, i1 true, i1 true, i1 false)
+  %res = select i1 %cond, i64 %objsize_max, i64 %objsize_min
+  ret i64 %res
+}
+
+attributes #0 = { nounwind allocsize(0) }



More information about the llvm-commits mailing list