[clang] 10264a1 - Introduce noundef attribute at call sites for stricter poison analysis
Gui Andrade via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 4 12:17:38 PST 2021
Author: Gui Andrade
Date: 2021-03-04T12:15:12-08:00
New Revision: 10264a1b21aebf75a8102116c9648c3386e8021e
URL: https://github.com/llvm/llvm-project/commit/10264a1b21aebf75a8102116c9648c3386e8021e
DIFF: https://github.com/llvm/llvm-project/commit/10264a1b21aebf75a8102116c9648c3386e8021e.diff
LOG: Introduce noundef attribute at call sites for stricter poison analysis
This change adds a new IR noundef attribute, which denotes when a function call argument or return val may never contain uninitialized bits.
In MemorySanitizer, this attribute enables optimizations which decrease instrumented code size by up to 17% (measured with an instrumented build of clang) . I'll introduce the change allowing msan to take advantage of this information in a separate patch.
Differential Revision: https://reviews.llvm.org/D81678
Added:
clang/test/CodeGen/attr-noundef.cpp
clang/test/CodeGen/indirect-noundef.cpp
Modified:
clang/include/clang/Basic/CodeGenOptions.def
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/CodeGen/CodeGenModule.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 50503dbe4f84..a1db1b101620 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -64,6 +64,7 @@ CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0
CODEGENOPT(ExperimentalStrictFloatingPoint, 1, 0) ///< Enables the new, experimental
///< strict floating point.
+CODEGENOPT(EnableNoundefAttrs, 1, 0) ///< Enable emitting `noundef` attributes on IR call arguments and return values
CODEGENOPT(LegacyPassManager, 1, 0) ///< Use the legacy pass manager.
CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
///< pass manager.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 583d08151e1a..d4d48deb649f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4994,6 +4994,9 @@ def code_completion_with_fixits : Flag<["-"], "code-completion-with-fixits">,
def disable_free : Flag<["-"], "disable-free">,
HelpText<"Disable freeing of memory on exit">,
MarshallingInfoFlag<FrontendOpts<"DisableFree">>;
+def enable_noundef_analysis : Flag<["-"], "enable-noundef-analysis">, Group<f_Group>,
+ HelpText<"Enable analyzing function argument and return types for mandatory definedness">,
+ MarshallingInfoFlag<CodeGenOpts<"EnableNoundefAttrs">>;
def discard_value_names : Flag<["-"], "discard-value-names">,
HelpText<"Discard value names in LLVM IR">,
MarshallingInfoFlag<CodeGenOpts<"DiscardValueNames">>;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 4ea707621b33..f5411daaa677 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1732,6 +1732,18 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
+bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
+ QualType ReturnType) {
+ // We can't just discard the return value for a record type with a
+ // complex destructor or a non-trivially copyable type.
+ if (const RecordType *RT =
+ ReturnType.getCanonicalType()->getAs<RecordType>()) {
+ if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+ return ClassDecl->hasTrivialDestructor();
+ }
+ return ReturnType.isTriviallyCopyableType(Context);
+}
+
void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
bool HasOptnone,
bool AttrOnCallSite,
@@ -1905,6 +1917,55 @@ static void addNoBuiltinAttributes(llvm::AttrBuilder &FuncAttrs,
llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr);
}
+static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
+ const llvm::DataLayout &DL, const ABIArgInfo &AI,
+ bool CheckCoerce = true) {
+ llvm::Type *Ty = Types.ConvertTypeForMem(QTy);
+ if (AI.getKind() == ABIArgInfo::Indirect)
+ return true;
+ if (AI.getKind() == ABIArgInfo::Extend)
+ return true;
+ if (!DL.typeSizeEqualsStoreSize(Ty))
+ // TODO: This will result in a modest amount of values not marked noundef
+ // when they could be. We care about values that *invisibly* contain undef
+ // bits from the perspective of LLVM IR.
+ return false;
+ if (CheckCoerce && AI.canHaveCoerceToType()) {
+ llvm::Type *CoerceTy = AI.getCoerceToType();
+ if (llvm::TypeSize::isKnownGT(DL.getTypeSizeInBits(CoerceTy),
+ DL.getTypeSizeInBits(Ty)))
+ // If we're coercing to a type with a greater size than the canonical one,
+ // we're introducing new undef bits.
+ // Coercing to a type of smaller or equal size is ok, as we know that
+ // there's no internal padding (typeSizeEqualsStoreSize).
+ return false;
+ }
+ if (QTy->isExtIntType())
+ return true;
+ if (QTy->isReferenceType())
+ return true;
+ if (QTy->isNullPtrType())
+ return false;
+ if (QTy->isMemberPointerType())
+ // TODO: Some member pointers are `noundef`, but it depends on the ABI. For
+ // now, never mark them.
+ return false;
+ if (QTy->isScalarType()) {
+ if (const ComplexType *Complex = dyn_cast<ComplexType>(QTy))
+ return DetermineNoUndef(Complex->getElementType(), Types, DL, AI, false);
+ return true;
+ }
+ if (const VectorType *Vector = dyn_cast<VectorType>(QTy))
+ return DetermineNoUndef(Vector->getElementType(), Types, DL, AI, false);
+ if (const MatrixType *Matrix = dyn_cast<MatrixType>(QTy))
+ return DetermineNoUndef(Matrix->getElementType(), Types, DL, AI, false);
+ if (const ArrayType *Array = dyn_cast<ArrayType>(QTy))
+ return DetermineNoUndef(Array->getElementType(), Types, DL, AI, false);
+
+ // TODO: Some structs may be `noundef`, in specific situations.
+ return false;
+}
+
/// Construct the IR attribute list of a function or call.
///
/// When adding an attribute, please consider where it should be handled:
@@ -2129,6 +2190,34 @@ void CodeGenModule::ConstructAttributeList(
QualType RetTy = FI.getReturnType();
const ABIArgInfo &RetAI = FI.getReturnInfo();
+ const llvm::DataLayout &DL = getDataLayout();
+
+ // C++ explicitly makes returning undefined values UB. C's rule only applies
+ // to used values, so we never mark them noundef for now.
+ bool HasStrictReturn = getLangOpts().CPlusPlus;
+ if (TargetDecl) {
+ if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl))
+ HasStrictReturn &= !FDecl->isExternC();
+ else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl))
+ // Function pointer
+ HasStrictReturn &= !VDecl->isExternC();
+ }
+
+ // We don't want to be too aggressive with the return checking, unless
+ // it's explicit in the code opts or we're using an appropriate sanitizer.
+ // Try to respect what the programmer intended.
+ HasStrictReturn &= getCodeGenOpts().StrictReturn ||
+ !MayDropFunctionReturn(getContext(), RetTy) ||
+ getLangOpts().Sanitize.has(SanitizerKind::Memory) ||
+ getLangOpts().Sanitize.has(SanitizerKind::Return);
+
+ // Determine if the return type could be partially undef
+ if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
+ if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
+ DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
+ RetAttrs.addAttribute(llvm::Attribute::NoUndef);
+ }
+
switch (RetAI.getKind()) {
case ABIArgInfo::Extend:
if (RetAI.isSignExt())
@@ -2245,6 +2334,11 @@ void CodeGenModule::ConstructAttributeList(
}
}
+ // Decide whether the argument we're handling could be partially undef
+ bool ArgNoUndef = DetermineNoUndef(ParamType, getTypes(), DL, AI);
+ if (CodeGenOpts.EnableNoundefAttrs && ArgNoUndef)
+ Attrs.addAttribute(llvm::Attribute::NoUndef);
+
// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
// have the corresponding parameter variable. It doesn't make
// sense to do it here because parameters are so messed up.
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index ef7f9fb25e7f..6d95adcb6ef0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1273,19 +1273,6 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
return ResTy;
}
-static bool
-shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
- const ASTContext &Context) {
- QualType T = FD->getReturnType();
- // Avoid the optimization for functions that return a record type with a
- // trivial destructor or another trivially copyable type.
- if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
- if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
- return !ClassDecl->hasTrivialDestructor();
- }
- return !T.isTriviallyCopyableType(Context);
-}
-
void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
const CGFunctionInfo &FnInfo) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@@ -1366,7 +1353,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
!FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
bool ShouldEmitUnreachable =
CGM.getCodeGenOpts().StrictReturn ||
- shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
+ !CGM.MayDropFunctionReturn(FD->getASTContext(), FD->getReturnType());
if (SanOpts.has(SanitizerKind::Return)) {
SanitizerScope SanScope(this);
llvm::Value *IsFalse = Builder.getFalse();
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index d495a169b609..a22438382dbd 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1374,6 +1374,10 @@ class CodeGenModule : public CodeGenTypeCache {
void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F);
+ /// Whether this function's return type has no side effects, and thus may
+ /// be trivially discarded if it is unused.
+ bool MayDropFunctionReturn(const ASTContext &Context, QualType ReturnType);
+
/// Returns whether this module needs the "all-vtables" type identifier.
bool NeedAllVtablesTypeId() const;
diff --git a/clang/test/CodeGen/attr-noundef.cpp b/clang/test/CodeGen/attr-noundef.cpp
new file mode 100644
index 000000000000..0f05795adf4b
--- /dev/null
+++ b/clang/test/CodeGen/attr-noundef.cpp
@@ -0,0 +1,162 @@
+// RUN: %clang -cc1 -triple x86_64-gnu-linux -x c++ -S -emit-llvm -enable-noundef-analysis %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-INTEL
+// RUN: %clang -cc1 -triple aarch64-gnu-linux -x c++ -S -emit-llvm -enable-noundef-analysis %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH
+
+//************ Passing structs by value
+// TODO: No structs may currently be marked noundef
+
+namespace check_structs {
+struct Trivial {
+ int a;
+};
+Trivial ret_trivial() { return {}; }
+void pass_trivial(Trivial e) {}
+// CHECK-INTEL: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i64 @{{.*}}ret_trivial
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
+
+struct NoCopy {
+ int a;
+ NoCopy(NoCopy &) = delete;
+};
+NoCopy ret_nocopy() { return {}; }
+void pass_nocopy(NoCopy e) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_nocopy{{.*}}(%"struct.check_structs::NoCopy"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_nocopy{{.*}}(%"struct.check_structs::NoCopy"* noundef %
+
+struct Huge {
+ int a[1024];
+};
+Huge ret_huge() { return {}; }
+void pass_huge(Huge h) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_huge{{.*}}(%"struct.check_structs::Huge"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_huge{{.*}}(%"struct.check_structs::Huge"* noundef
+} // namespace check_structs
+
+//************ Passing unions by value
+// No unions may be marked noundef
+
+namespace check_unions {
+union Trivial {
+ int a;
+};
+Trivial ret_trivial() { return {}; }
+void pass_trivial(Trivial e) {}
+// CHECK-INTEL: [[DEFINE]] i32 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE]] i64 @{{.*}}ret_trivial
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
+
+union NoCopy {
+ int a;
+ NoCopy(NoCopy &) = delete;
+};
+NoCopy ret_nocopy() { return {}; }
+void pass_nocopy(NoCopy e) {}
+// CHECK: [[DEFINE]] void @{{.*}}ret_nocopy{{.*}}(%"union.check_unions::NoCopy"* noalias sret({{[^)]+}}) align 4 %
+// CHECK: [[DEFINE]] void @{{.*}}pass_nocopy{{.*}}(%"union.check_unions::NoCopy"* noundef %
+} // namespace check_unions
+
+//************ Passing `this` pointers
+// `this` pointer must always be defined
+
+namespace check_this {
+struct Object {
+ int data[];
+
+ Object() {
+ this->data[0] = 0;
+ }
+ int getData() {
+ return this->data[0];
+ }
+ Object *getThis() {
+ return this;
+ }
+};
+
+void use_object() {
+ Object obj;
+ obj.getData();
+ obj.getThis();
+}
+// CHECK: define linkonce_odr void @{{.*}}Object{{.*}}(%"struct.check_this::Object"* noundef %
+// CHECK: define linkonce_odr noundef i32 @{{.*}}Object{{.*}}getData{{.*}}(%"struct.check_this::Object"* noundef %
+// CHECK: define linkonce_odr noundef %"struct.check_this::Object"* @{{.*}}Object{{.*}}getThis{{.*}}(%"struct.check_this::Object"* noundef %
+} // namespace check_this
+
+//************ Passing vector types
+
+namespace check_vecs {
+typedef int __attribute__((vector_size(12))) i32x3;
+i32x3 ret_vec() {
+ return {};
+}
+void pass_vec(i32x3 v) {
+}
+
+// CHECK: [[DEFINE]] noundef <3 x i32> @{{.*}}ret_vec{{.*}}()
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_vec{{.*}}(<3 x i32> noundef %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_vec{{.*}}(<4 x i32> %
+} // namespace check_vecs
+
+//************ Passing exotic types
+// Function/Array pointers, Function member / Data member pointers, nullptr_t, ExtInt types
+
+namespace check_exotic {
+struct Object {
+ int mfunc();
+ int mdata;
+};
+typedef int Object::*mdptr;
+typedef int (Object::*mfptr)();
+typedef decltype(nullptr) nullptr_t;
+typedef int (*arrptr)[32];
+typedef int (*fnptr)(int);
+
+arrptr ret_arrptr() {
+ return nullptr;
+}
+fnptr ret_fnptr() {
+ return nullptr;
+}
+mdptr ret_mdptr() {
+ return nullptr;
+}
+mfptr ret_mfptr() {
+ return nullptr;
+}
+nullptr_t ret_npt() {
+ return nullptr;
+}
+void pass_npt(nullptr_t t) {
+}
+_ExtInt(3) ret_extint() {
+ return 0;
+}
+void pass_extint(_ExtInt(3) e) {
+}
+void pass_large_extint(_ExtInt(127) e) {
+}
+
+// Pointers to arrays/functions are always noundef
+// CHECK: [[DEFINE]] noundef [32 x i32]* @{{.*}}ret_arrptr{{.*}}()
+// CHECK: [[DEFINE]] noundef i32 (i32)* @{{.*}}ret_fnptr{{.*}}()
+
+// Pointers to members are never noundef
+// CHECK: [[DEFINE]] i64 @{{.*}}ret_mdptr{{.*}}()
+// CHECK-INTEL: [[DEFINE]] { i64, i64 } @{{.*}}ret_mfptr{{.*}}()
+// CHECK-AARCH: [[DEFINE]] [2 x i64] @{{.*}}ret_mfptr{{.*}}()
+
+// nullptr_t is never noundef
+// CHECK: [[DEFINE]] i8* @{{.*}}ret_npt{{.*}}()
+// CHECK: [[DEFINE]] void @{{.*}}pass_npt{{.*}}(i8* %
+
+// TODO: for now, ExtInt is only noundef if it is sign/zero-extended
+// CHECK-INTEL: [[DEFINE]] noundef signext i3 @{{.*}}ret_extint{{.*}}()
+// CHECK-AARCH: [[DEFINE]] i3 @{{.*}}ret_extint{{.*}}()
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_extint{{.*}}(i3 noundef signext %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_extint{{.*}}(i3 %
+// CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_large_extint{{.*}}(i64 %{{.*}}, i64 %
+// CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_large_extint{{.*}}(i127 %
+} // namespace check_exotic
diff --git a/clang/test/CodeGen/indirect-noundef.cpp b/clang/test/CodeGen/indirect-noundef.cpp
new file mode 100644
index 000000000000..6eccf8c6fb3a
--- /dev/null
+++ b/clang/test/CodeGen/indirect-noundef.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang -cc1 -x c++ -triple x86_64-unknown-unknown -O0 -emit-llvm -enable-noundef-analysis -o - %s | FileCheck %s
+
+union u1 {
+ int val;
+};
+
+// CHECK: @indirect_callee_int_ptr = [[GLOBAL:(dso_local )?global]] i32 (i32)*
+int (*indirect_callee_int_ptr)(int);
+// CHECK: @indirect_callee_union_ptr = [[GLOBAL]] i32 (i32)*
+union u1 (*indirect_callee_union_ptr)(union u1);
+
+// CHECK: [[DEFINE:define( dso_local)?]] noundef i32 @{{.*}}indirect_callee_int{{.*}}(i32 noundef %
+int indirect_callee_int(int a) { return a; }
+// CHECK: [[DEFINE]] i32 @{{.*}}indirect_callee_union{{.*}}(i32 %
+union u1 indirect_callee_union(union u1 a) {
+ return a;
+}
+
+int main() {
+ // CHECK: call noundef i32 @{{.*}}indirect_callee_int{{.*}}(i32 noundef 0)
+ indirect_callee_int(0);
+ // CHECK: call i32 @{{.*}}indirect_callee_union{{.*}}(i32 %
+ indirect_callee_union((union u1){0});
+
+ indirect_callee_int_ptr = indirect_callee_int;
+ indirect_callee_union_ptr = indirect_callee_union;
+
+ // CHECK: call noundef i32 %{{.*}}(i32 noundef 0)
+ indirect_callee_int_ptr(0);
+ // CHECK: call i32 %{{.*}}(i32 %
+ indirect_callee_union_ptr((union u1){});
+
+ return 0;
+}
More information about the cfe-commits
mailing list