[clang] 10264a1 - Introduce noundef attribute at call sites for stricter poison analysis

Gui Andrade via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 4 12:17:38 PST 2021


Author: Gui Andrade
Date: 2021-03-04T12:15:12-08:00
New Revision: 10264a1b21aebf75a8102116c9648c3386e8021e

URL: https://github.com/llvm/llvm-project/commit/10264a1b21aebf75a8102116c9648c3386e8021e
DIFF: https://github.com/llvm/llvm-project/commit/10264a1b21aebf75a8102116c9648c3386e8021e.diff

LOG: Introduce noundef attribute at call sites for stricter poison analysis

This change adds a new IR noundef attribute, which denotes when a function call argument or return val may never contain uninitialized bits.

In MemorySanitizer, this attribute enables optimizations which decrease instrumented code size by up to 17% (measured with an instrumented build of clang) . I'll introduce the change allowing msan to take advantage of this information in a separate patch.

Differential Revision: https://reviews.llvm.org/D81678

Added: 
    clang/test/CodeGen/attr-noundef.cpp
    clang/test/CodeGen/indirect-noundef.cpp

Modified: 
    clang/include/clang/Basic/CodeGenOptions.def
    clang/include/clang/Driver/Options.td
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/CodeGenFunction.cpp
    clang/lib/CodeGen/CodeGenModule.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 50503dbe4f84..a1db1b101620 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -64,6 +64,7 @@ CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
 CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0
 CODEGENOPT(ExperimentalStrictFloatingPoint, 1, 0) ///< Enables the new, experimental
                                                   ///< strict floating point.
+CODEGENOPT(EnableNoundefAttrs, 1, 0) ///< Enable emitting `noundef` attributes on IR call arguments and return values
 CODEGENOPT(LegacyPassManager, 1, 0) ///< Use the legacy pass manager.
 CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
                                    ///< pass manager.

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 583d08151e1a..d4d48deb649f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4994,6 +4994,9 @@ def code_completion_with_fixits : Flag<["-"], "code-completion-with-fixits">,
 def disable_free : Flag<["-"], "disable-free">,
   HelpText<"Disable freeing of memory on exit">,
   MarshallingInfoFlag<FrontendOpts<"DisableFree">>;
+def enable_noundef_analysis : Flag<["-"], "enable-noundef-analysis">, Group<f_Group>,
+  HelpText<"Enable analyzing function argument and return types for mandatory definedness">,
+  MarshallingInfoFlag<CodeGenOpts<"EnableNoundefAttrs">>;
 def discard_value_names : Flag<["-"], "discard-value-names">,
   HelpText<"Discard value names in LLVM IR">,
   MarshallingInfoFlag<CodeGenOpts<"DiscardValueNames">>;

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 4ea707621b33..f5411daaa677 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1732,6 +1732,18 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
     FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
 }
 
+bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
+                                          QualType ReturnType) {
+  // We can't just discard the return value for a record type with a
+  // complex destructor or a non-trivially copyable type.
+  if (const RecordType *RT =
+          ReturnType.getCanonicalType()->getAs<RecordType>()) {
+    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return ClassDecl->hasTrivialDestructor();
+  }
+  return ReturnType.isTriviallyCopyableType(Context);
+}
+
 void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
                                                  bool HasOptnone,
                                                  bool AttrOnCallSite,
@@ -1905,6 +1917,55 @@ static void addNoBuiltinAttributes(llvm::AttrBuilder &FuncAttrs,
   llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr);
 }
 
+static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
+                             const llvm::DataLayout &DL, const ABIArgInfo &AI,
+                             bool CheckCoerce = true) {
+  llvm::Type *Ty = Types.ConvertTypeForMem(QTy);
+  if (AI.getKind() == ABIArgInfo::Indirect)
+    return true;
+  if (AI.getKind() == ABIArgInfo::Extend)
+    return true;
+  if (!DL.typeSizeEqualsStoreSize(Ty))
+    // TODO: This will result in a modest amount of values not marked noundef
+    // when they could be. We care about values that *invisibly* contain undef
+    // bits from the perspective of LLVM IR.
+    return false;
+  if (CheckCoerce && AI.canHaveCoerceToType()) {
+    llvm::Type *CoerceTy = AI.getCoerceToType();
+    if (llvm::TypeSize::isKnownGT(DL.getTypeSizeInBits(CoerceTy),
+                                  DL.getTypeSizeInBits(Ty)))
+      // If we're coercing to a type with a greater size than the canonical one,
+      // we're introducing new undef bits.
+      // Coercing to a type of smaller or equal size is ok, as we know that
+      // there's no internal padding (typeSizeEqualsStoreSize).
+      return false;
+  }
+  if (QTy->isExtIntType())
+    return true;
+  if (QTy->isReferenceType())
+    return true;
+  if (QTy->isNullPtrType())
+    return false;
+  if (QTy->isMemberPointerType())
+    // TODO: Some member pointers are `noundef`, but it depends on the ABI. For
+    // now, never mark them.
+    return false;
+  if (QTy->isScalarType()) {
+    if (const ComplexType *Complex = dyn_cast<ComplexType>(QTy))
+      return DetermineNoUndef(Complex->getElementType(), Types, DL, AI, false);
+    return true;
+  }
+  if (const VectorType *Vector = dyn_cast<VectorType>(QTy))
+    return DetermineNoUndef(Vector->getElementType(), Types, DL, AI, false);
+  if (const MatrixType *Matrix = dyn_cast<MatrixType>(QTy))
+    return DetermineNoUndef(Matrix->getElementType(), Types, DL, AI, false);
+  if (const ArrayType *Array = dyn_cast<ArrayType>(QTy))
+    return DetermineNoUndef(Array->getElementType(), Types, DL, AI, false);
+
+  // TODO: Some structs may be `noundef`, in specific situations.
+  return false;
+}
+
 /// Construct the IR attribute list of a function or call.
 ///
 /// When adding an attribute, please consider where it should be handled:
@@ -2129,6 +2190,34 @@ void CodeGenModule::ConstructAttributeList(
 
   QualType RetTy = FI.getReturnType();
   const ABIArgInfo &RetAI = FI.getReturnInfo();
+  const llvm::DataLayout &DL = getDataLayout();
+
+  // C++ explicitly makes returning undefined values UB. C's rule only applies
+  // to used values, so we never mark them noundef for now.
+  bool HasStrictReturn = getLangOpts().CPlusPlus;
+  if (TargetDecl) {
+    if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl))
+      HasStrictReturn &= !FDecl->isExternC();
+    else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl))
+      // Function pointer
+      HasStrictReturn &= !VDecl->isExternC();
+  }
+
+  // We don't want to be too aggressive with the return checking, unless
+  // it's explicit in the code opts or we're using an appropriate sanitizer.
+  // Try to respect what the programmer intended.
+  HasStrictReturn &= getCodeGenOpts().StrictReturn ||
+                     !MayDropFunctionReturn(getContext(), RetTy) ||
+                     getLangOpts().Sanitize.has(SanitizerKind::Memory) ||
+                     getLangOpts().Sanitize.has(SanitizerKind::Return);
+
+  // Determine if the return type could be partially undef
+  if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
+    if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
+        DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
+      RetAttrs.addAttribute(llvm::Attribute::NoUndef);
+  }
+
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
     if (RetAI.isSignExt())
@@ -2245,6 +2334,11 @@ void CodeGenModule::ConstructAttributeList(
       }
     }
 
+    // Decide whether the argument we're handling could be partially undef
+    bool ArgNoUndef = DetermineNoUndef(ParamType, getTypes(), DL, AI);
+    if (CodeGenOpts.EnableNoundefAttrs && ArgNoUndef)
+      Attrs.addAttribute(llvm::Attribute::NoUndef);
+
     // 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
     // have the corresponding parameter variable.  It doesn't make
     // sense to do it here because parameters are so messed up.

diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index ef7f9fb25e7f..6d95adcb6ef0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1273,19 +1273,6 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
   return ResTy;
 }
 
-static bool
-shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
-                                             const ASTContext &Context) {
-  QualType T = FD->getReturnType();
-  // Avoid the optimization for functions that return a record type with a
-  // trivial destructor or another trivially copyable type.
-  if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
-    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
-      return !ClassDecl->hasTrivialDestructor();
-  }
-  return !T.isTriviallyCopyableType(Context);
-}
-
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                    const CGFunctionInfo &FnInfo) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@@ -1366,7 +1353,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
       !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
     bool ShouldEmitUnreachable =
         CGM.getCodeGenOpts().StrictReturn ||
-        shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
+        !CGM.MayDropFunctionReturn(FD->getASTContext(), FD->getReturnType());
     if (SanOpts.has(SanitizerKind::Return)) {
       SanitizerScope SanScope(this);
       llvm::Value *IsFalse = Builder.getFalse();

diff  --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d495a169b609..a22438382dbd 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1374,6 +1374,10 @@ class CodeGenModule : public CodeGenTypeCache {
   void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
                                           llvm::Function *F);
 
+  /// Whether this function's return type has no side effects, and thus may
+  /// be trivially discarded if it is unused.
+  bool MayDropFunctionReturn(const ASTContext &Context, QualType ReturnType);
+
   /// Returns whether this module needs the "all-vtables" type identifier.
   bool NeedAllVtablesTypeId() const;
 

diff  --git a/clang/test/CodeGen/attr-noundef.cpp b/clang/test/CodeGen/attr-noundef.cpp
new file mode 100644
index 000000000000..0f05795adf4b
--- /dev/null
+++ b/clang/test/CodeGen/attr-noundef.cpp
@@ -0,0 +1,162 @@
+// RUN: %clang -cc1 -triple x86_64-gnu-linux -x c++ -S -emit-llvm -enable-noundef-analysis %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-INTEL
+// RUN: %clang -cc1 -triple aarch64-gnu-linux -x c++ -S -emit-llvm -enable-noundef-analysis %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH
+
+//************ Passing structs by value
+// TODO: No structs may currently be marked noundef
+
+namespace check_structs {
+struct Trivial {
+  int a;
+};
+Trivial ret_trivial() { return {}; }
+void pass_trivial(Trivial e) {}
+// CHECK-INTEL: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i64 @{{.*}}ret_trivial
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
+
+struct NoCopy {
+  int a;
+  NoCopy(NoCopy &) = delete;
+};
+NoCopy ret_nocopy() { return {}; }
+void pass_nocopy(NoCopy e) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_nocopy{{.*}}(%"struct.check_structs::NoCopy"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_nocopy{{.*}}(%"struct.check_structs::NoCopy"* noundef %
+
+struct Huge {
+  int a[1024];
+};
+Huge ret_huge() { return {}; }
+void pass_huge(Huge h) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_huge{{.*}}(%"struct.check_structs::Huge"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_huge{{.*}}(%"struct.check_structs::Huge"* noundef
+} // namespace check_structs
+
+//************ Passing unions by value
+// No unions may be marked noundef
+
+namespace check_unions {
+union Trivial {
+  int a;
+};
+Trivial ret_trivial() { return {}; }
+void pass_trivial(Trivial e) {}
+// CHECK-INTEL: [[DEFINE]] i32 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE]] i64 @{{.*}}ret_trivial
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
+
+union NoCopy {
+  int a;
+  NoCopy(NoCopy &) = delete;
+};
+NoCopy ret_nocopy() { return {}; }
+void pass_nocopy(NoCopy e) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_nocopy{{.*}}(%"union.check_unions::NoCopy"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_nocopy{{.*}}(%"union.check_unions::NoCopy"* noundef %
+} // namespace check_unions
+
+//************ Passing `this` pointers
+// `this` pointer must always be defined
+
+namespace check_this {
+struct Object {
+  int data[];
+
+  Object() {
+    this->data[0] = 0;
+  }
+  int getData() {
+    return this->data[0];
+  }
+  Object *getThis() {
+    return this;
+  }
+};
+
+void use_object() {
+  Object obj;
+  obj.getData();
+  obj.getThis();
+}
+// CHECK: define linkonce_odr void @{{.*}}Object{{.*}}(%"struct.check_this::Object"* noundef %
+// CHECK: define linkonce_odr noundef i32 @{{.*}}Object{{.*}}getData{{.*}}(%"struct.check_this::Object"* noundef %
+// CHECK: define linkonce_odr noundef %"struct.check_this::Object"* @{{.*}}Object{{.*}}getThis{{.*}}(%"struct.check_this::Object"* noundef %
+} // namespace check_this
+
+//************ Passing vector types
+
+namespace check_vecs {
+typedef int __attribute__((vector_size(12))) i32x3;
+i32x3 ret_vec() {
+  return {};
+}
+void pass_vec(i32x3 v) {
+}
+
+// CHECK: [[DEFINE]] noundef <3 x i32> @{{.*}}ret_vec{{.*}}()
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_vec{{.*}}(<3 x i32> noundef %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_vec{{.*}}(<4 x i32> %
+} // namespace check_vecs
+
+//************ Passing exotic types
+// Function/Array pointers, Function member / Data member pointers, nullptr_t, ExtInt types
+
+namespace check_exotic {
+struct Object {
+  int mfunc();
+  int mdata;
+};
+typedef int Object::*mdptr;
+typedef int (Object::*mfptr)();
+typedef decltype(nullptr) nullptr_t;
+typedef int (*arrptr)[32];
+typedef int (*fnptr)(int);
+
+arrptr ret_arrptr() {
+  return nullptr;
+}
+fnptr ret_fnptr() {
+  return nullptr;
+}
+mdptr ret_mdptr() {
+  return nullptr;
+}
+mfptr ret_mfptr() {
+  return nullptr;
+}
+nullptr_t ret_npt() {
+  return nullptr;
+}
+void pass_npt(nullptr_t t) {
+}
+_ExtInt(3) ret_extint() {
+  return 0;
+}
+void pass_extint(_ExtInt(3) e) {
+}
+void pass_large_extint(_ExtInt(127) e) {
+}
+
+// Pointers to arrays/functions are always noundef
+// CHECK: [[DEFINE]] noundef [32 x i32]* @{{.*}}ret_arrptr{{.*}}()
+// CHECK: [[DEFINE]] noundef i32 (i32)* @{{.*}}ret_fnptr{{.*}}()
+
+// Pointers to members are never noundef
+// CHECK: [[DEFINE]] i64 @{{.*}}ret_mdptr{{.*}}()
+// CHECK-INTEL: [[DEFINE]] { i64, i64 } @{{.*}}ret_mfptr{{.*}}()
+// CHECK-AARCH: [[DEFINE]] [2 x i64] @{{.*}}ret_mfptr{{.*}}()
+
+// nullptr_t is never noundef
+// CHECK: [[DEFINE]] i8* @{{.*}}ret_npt{{.*}}()
+// CHECK: [[DEFINE]] void @{{.*}}pass_npt{{.*}}(i8* %
+
+// TODO: for now, ExtInt is only noundef if it is sign/zero-extended
+// CHECK-INTEL: [[DEFINE]] noundef signext i3 @{{.*}}ret_extint{{.*}}()
+// CHECK-AARCH: [[DEFINE]] i3 @{{.*}}ret_extint{{.*}}()
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_extint{{.*}}(i3 noundef signext %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_extint{{.*}}(i3 %
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_large_extint{{.*}}(i64 %{{.*}}, i64 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_large_extint{{.*}}(i127 %
+} // namespace check_exotic

diff  --git a/clang/test/CodeGen/indirect-noundef.cpp b/clang/test/CodeGen/indirect-noundef.cpp
new file mode 100644
index 000000000000..6eccf8c6fb3a
--- /dev/null
+++ b/clang/test/CodeGen/indirect-noundef.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang -cc1 -x c++ -triple x86_64-unknown-unknown -O0 -emit-llvm -enable-noundef-analysis -o - %s | FileCheck %s
+
+union u1 {
+  int val;
+};
+
+// CHECK: @indirect_callee_int_ptr = [[GLOBAL:(dso_local )?global]] i32 (i32)*
+int (*indirect_callee_int_ptr)(int);
+// CHECK: @indirect_callee_union_ptr = [[GLOBAL]] i32 (i32)*
+union u1 (*indirect_callee_union_ptr)(union u1);
+
+// CHECK: [[DEFINE:define( dso_local)?]] noundef i32 @{{.*}}indirect_callee_int{{.*}}(i32 noundef %
+int indirect_callee_int(int a) { return a; }
+// CHECK: [[DEFINE]] i32 @{{.*}}indirect_callee_union{{.*}}(i32 %
+union u1 indirect_callee_union(union u1 a) {
+  return a;
+}
+
+int main() {
+  // CHECK: call noundef i32 @{{.*}}indirect_callee_int{{.*}}(i32 noundef 0)
+  indirect_callee_int(0);
+  // CHECK: call i32 @{{.*}}indirect_callee_union{{.*}}(i32 %
+  indirect_callee_union((union u1){0});
+
+  indirect_callee_int_ptr = indirect_callee_int;
+  indirect_callee_union_ptr = indirect_callee_union;
+
+  // CHECK: call noundef i32 %{{.*}}(i32 noundef 0)
+  indirect_callee_int_ptr(0);
+  // CHECK: call i32 %{{.*}}(i32 %
+  indirect_callee_union_ptr((union u1){});
+
+  return 0;
+}


        


More information about the cfe-commits mailing list