[clang] ec2e26e - [Clang] Add __builtin_function_start

Sami Tolvanen via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 20 13:11:18 PST 2021


Author: Sami Tolvanen
Date: 2021-12-20T12:55:33-08:00
New Revision: ec2e26eaf63558934f5b73a6e530edc453cf9508

URL: https://github.com/llvm/llvm-project/commit/ec2e26eaf63558934f5b73a6e530edc453cf9508
DIFF: https://github.com/llvm/llvm-project/commit/ec2e26eaf63558934f5b73a6e530edc453cf9508.diff

LOG: [Clang] Add __builtin_function_start

Control-Flow Integrity (CFI) replaces references to address-taken
functions with pointers to the CFI jump table. This is a problem
for low-level code, such as operating system kernels, which may
need the address of an actual function body without the jump table
indirection.

This change adds the __builtin_function_start() builtin, which
accepts an argument that can be constant-evaluated to a function,
and returns the address of the function body.

Link: https://github.com/ClangBuiltLinux/linux/issues/1353

Depends on D108478

Reviewed By: pcc, rjmccall

Differential Revision: https://reviews.llvm.org/D108479

Added: 
    clang/test/CodeGen/builtin-function-start.cpp

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/include/clang/AST/Expr.h
    clang/include/clang/Basic/Builtins.def
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/AST/Expr.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CGExprConstant.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/CodeGen/CodeGenModule.h
    clang/lib/Sema/SemaChecking.cpp
    clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
    clang/test/SemaCXX/builtins.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 24e85d336ef13..a47625e4a8dd2 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2517,6 +2517,48 @@ object that overloads ``operator&``.
     return __builtin_addressof(value);
   }
 
+``__builtin_function_start``
+-----------------------------
+
+``__builtin_function_start`` returns the address of a function body.
+
+**Syntax**:
+
+.. code-block:: c++
+
+  void *__builtin_function_start(function)
+
+**Example of use**:
+
+.. code-block:: c++
+
+  void a() {}
+  void *p = __builtin_function_start(a);
+
+  class A {
+  public:
+    void a(int n);
+    void a();
+  };
+
+  void A::a(int n) {}
+  void A::a() {}
+
+  void *pa1 = __builtin_function_start((void(A::*)(int)) &A::a);
+  void *pa2 = __builtin_function_start((void(A::*)()) &A::a);
+
+**Description**:
+
+The ``__builtin_function_start`` builtin accepts an argument that can be
+constant-evaluated to a function, and returns the address of the function
+body.  This builtin is not supported on all targets.
+
+The returned pointer may 
diff er from the normally taken function address
+and is not safe to call.  For example, with ``-fsanitize=cfi``, taking a
+function address produces a callable pointer to a CFI jump table, while
+``__builtin_function_start`` returns an address that fails
+:doc:`cfi-icall<ControlFlowIntegrity>` checks.
+
 ``__builtin_operator_new`` and ``__builtin_operator_delete``
 ------------------------------------------------------------
 

diff  --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 2c63406fba18d..e2c36e12393fa 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -572,6 +572,12 @@ class Expr : public ValueStmt {
   bool isConstantInitializer(ASTContext &Ctx, bool ForRef,
                              const Expr **Culprit = nullptr) const;
 
+  /// If this expression is an unambiguous reference to a single declaration,
+  /// in the style of __builtin_function_start, return that declaration.  Note
+  /// that this may return a non-static member function or field in C++ if this
+  /// expression is a member pointer constant.
+  const ValueDecl *getAsBuiltinConstantDeclRef(const ASTContext &Context) const;
+
   /// EvalStatus is a struct with detailed info about an evaluation in progress.
   struct EvalStatus {
     /// Whether the evaluated expression has side effects.

diff  --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 7ef550e5d81af..f57635e010dce 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -1575,6 +1575,7 @@ BUILTIN(__builtin_smulll_overflow, "bSLLiCSLLiCSLLi*", "n")
 
 // Clang builtins (not available in GCC).
 BUILTIN(__builtin_addressof, "v*v&", "nct")
+BUILTIN(__builtin_function_start, "v*v&", "nct")
 BUILTIN(__builtin_operator_new, "v*z", "tc")
 BUILTIN(__builtin_operator_delete, "vv*", "tn")
 BUILTIN(__builtin_char_memchr, "c*cC*iz", "n")

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c26dbbbf1f697..875e639cb2beb 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -838,6 +838,9 @@ def warn_fortify_scanf_overflow : Warning<
   "%2, but the corresponding specifier may require size %3">,
   InGroup<FortifySource>;
 
+def err_function_start_invalid_type: Error<
+  "argument must be a function">;
+
 /// main()
 // static main() is not an error in C, just in C++.
 def warn_static_main : Warning<"'main' should not be declared static">,

diff  --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index d3cb2ff3734cb..2530beb89d177 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -202,6 +202,23 @@ bool Expr::isKnownToHaveBooleanValue(bool Semantic) const {
   return false;
 }
 
+const ValueDecl *
+Expr::getAsBuiltinConstantDeclRef(const ASTContext &Context) const {
+  Expr::EvalResult Eval;
+
+  if (EvaluateAsConstantExpr(Eval, Context)) {
+    APValue &Value = Eval.Val;
+
+    if (Value.isMemberPointer())
+      return Value.getMemberPointerDecl();
+
+    if (Value.isLValue() && Value.getLValueOffset().isZero())
+      return Value.getLValueBase().dyn_cast<const ValueDecl *>();
+  }
+
+  return nullptr;
+}
+
 // Amusing macro metaprogramming hack: check whether a class provides
 // a more specific implementation of getExprLoc().
 //

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9fcba4e25cef6..469339e8cd624 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1954,11 +1954,12 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
   return true;
 }
 
-/// Should this call expression be treated as a string literal?
-static bool IsStringLiteralCall(const CallExpr *E) {
+/// Should this call expression be treated as a constant?
+static bool IsConstantCall(const CallExpr *E) {
   unsigned Builtin = E->getBuiltinCallee();
   return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
-          Builtin == Builtin::BI__builtin___NSStringMakeConstantString);
+          Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
+          Builtin == Builtin::BI__builtin_function_start);
 }
 
 static bool IsGlobalLValue(APValue::LValueBase B) {
@@ -2004,7 +2005,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
   case Expr::ObjCBoxedExprClass:
     return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer();
   case Expr::CallExprClass:
-    return IsStringLiteralCall(cast<CallExpr>(E));
+    return IsConstantCall(cast<CallExpr>(E));
   // For GCC compatibility, &&label has static storage duration.
   case Expr::AddrLabelExprClass:
     return true;
@@ -8967,7 +8968,7 @@ bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) {
 }
 
 bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) {
-  if (IsStringLiteralCall(E))
+  if (IsConstantCall(E))
     return Success(E);
 
   if (unsigned BuiltinOp = E->getBuiltinCallee())

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 740f464a1dd7e..2cbc8f77bd391 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4505,6 +4505,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_addressof:
     return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
+  case Builtin::BI__builtin_function_start:
+    return RValue::get(CGM.GetFunctionStart(
+        E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
   case Builtin::BI__builtin_operator_new:
     return EmitBuiltinNewDeleteCall(
         E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);

diff  --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 47eed6ea2aa84..cf1f2e0eab92d 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1988,6 +1988,9 @@ ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) {
 ConstantLValue
 ConstantLValueEmitter::VisitCallExpr(const CallExpr *E) {
   unsigned builtin = E->getBuiltinCallee();
+  if (builtin == Builtin::BI__builtin_function_start)
+    return CGM.GetFunctionStart(
+        E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext()));
   if (builtin != Builtin::BI__builtin___CFStringMakeConstantString &&
       builtin != Builtin::BI__builtin___NSStringMakeConstantString)
     return nullptr;

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 116abad57d67c..36b7ce87336c9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3886,6 +3886,14 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
   return F;
 }
 
+llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) {
+  llvm::GlobalValue *F =
+      cast<llvm::GlobalValue>(GetAddrOfFunction(Decl)->stripPointerCasts());
+
+  return llvm::ConstantExpr::getBitCast(llvm::NoCFIValue::get(F),
+                                        llvm::Type::getInt8PtrTy(VMContext));
+}
+
 static const FunctionDecl *
 GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) {
   TranslationUnitDecl *TUDecl = C.getTranslationUnitDecl();

diff  --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index e1c7f486d334e..f1565511f98ad 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -881,6 +881,9 @@ class CodeGenModule : public CodeGenTypeCache {
                                     ForDefinition_t IsForDefinition
                                       = NotForDefinition);
 
+  // Return the function body address of the given function.
+  llvm::Constant *GetFunctionStart(const ValueDecl *Decl);
+
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 956f7abce7372..fb99591656d34 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -195,6 +195,29 @@ static bool SemaBuiltinAddressof(Sema &S, CallExpr *TheCall) {
   return false;
 }
 
+/// Check that the argument to __builtin_function_start is a function.
+static bool SemaBuiltinFunctionStart(Sema &S, CallExpr *TheCall) {
+  if (checkArgCount(S, TheCall, 1))
+    return true;
+
+  ExprResult Arg = S.DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (Arg.isInvalid())
+    return true;
+
+  TheCall->setArg(0, Arg.get());
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(
+      Arg.get()->getAsBuiltinConstantDeclRef(S.getASTContext()));
+
+  if (!FD) {
+    S.Diag(TheCall->getBeginLoc(), diag::err_function_start_invalid_type)
+        << TheCall->getSourceRange();
+    return true;
+  }
+
+  return !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                              TheCall->getBeginLoc());
+}
+
 /// Check the number of arguments and set the result type to
 /// the argument type.
 static bool SemaBuiltinPreserveAI(Sema &S, CallExpr *TheCall) {
@@ -1918,6 +1941,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (SemaBuiltinAddressof(*this, TheCall))
       return ExprError();
     break;
+  case Builtin::BI__builtin_function_start:
+    if (SemaBuiltinFunctionStart(*this, TheCall))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_is_aligned:
   case Builtin::BI__builtin_align_up:
   case Builtin::BI__builtin_align_down:

diff  --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 13781b3364261..4a56156de4b27 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -66,7 +66,8 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call,
   case Builtin::BI__builtin_expect:
   case Builtin::BI__builtin_expect_with_probability:
   case Builtin::BI__builtin_assume_aligned:
-  case Builtin::BI__builtin_addressof: {
+  case Builtin::BI__builtin_addressof:
+  case Builtin::BI__builtin_function_start: {
     // For __builtin_unpredictable, __builtin_expect,
     // __builtin_expect_with_probability and __builtin_assume_aligned,
     // just return the value of the subexpression.

diff  --git a/clang/test/CodeGen/builtin-function-start.cpp b/clang/test/CodeGen/builtin-function-start.cpp
new file mode 100644
index 0000000000000..7290edde97b79
--- /dev/null
+++ b/clang/test/CodeGen/builtin-function-start.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=cfi-icall -o - %s | FileCheck %s
+
+#if !__has_builtin(__builtin_function_start)
+#error "missing __builtin_function_start"
+#endif
+
+void a(void) {}
+// CHECK: @e = global i8* bitcast (void ()* no_cfi @_Z1av to i8*)
+const void *e = __builtin_function_start(a);
+
+constexpr void (*d)() = &a;
+// CHECK: @f = global i8* bitcast (void ()* no_cfi @_Z1av to i8*)
+const void *f = __builtin_function_start(d);
+
+void b(void) {}
+// CHECK: @g = global [2 x i8*] [i8* bitcast (void ()* @_Z1bv to i8*), i8* bitcast (void ()* no_cfi @_Z1bv to i8*)]
+void *g[] = {(void *)b, __builtin_function_start(b)};
+
+void c(void *p) {}
+
+class A {
+public:
+  void f();
+  virtual void g();
+  static void h();
+  int i() const;
+  int i(int n) const;
+};
+
+void A::f() {}
+void A::g() {}
+void A::h() {}
+
+// CHECK: define {{.*}}i32 @_ZNK1A1iEv(%class.A* {{.*}}%this)
+int A::i() const { return 0; }
+
+// CHECK: define {{.*}}i32 @_ZNK1A1iEi(%class.A* {{.*}}%this, i32 %n)
+int A::i(int n) const { return 0; }
+
+void h(void) {
+  // CHECK: store i8* bitcast (void ()* no_cfi @_Z1bv to i8*), i8** %g
+  void *g = __builtin_function_start(b);
+  // CHECK: call void @_Z1cPv(i8* bitcast (void ()* no_cfi @_Z1av to i8*))
+  c(__builtin_function_start(a));
+
+  // CHECK: store i8* bitcast (void (%class.A*)* no_cfi @_ZN1A1fEv to i8*), i8** %Af
+  void *Af = __builtin_function_start(&A::f);
+  // CHECK: store i8* bitcast (void (%class.A*)* no_cfi @_ZN1A1gEv to i8*), i8** %Ag
+  void *Ag = __builtin_function_start(&A::g);
+  // CHECK: store i8* bitcast (void ()* no_cfi @_ZN1A1hEv to i8*), i8** %Ah
+  void *Ah = __builtin_function_start(&A::h);
+  // CHECK: store i8* bitcast (i32 (%class.A*)* no_cfi @_ZNK1A1iEv to i8*), i8** %Ai1
+  void *Ai1 = __builtin_function_start((int(A::*)() const) & A::i);
+  // CHECK: store i8* bitcast (i32 (%class.A*, i32)* no_cfi @_ZNK1A1iEi to i8*), i8** %Ai2
+  void *Ai2 = __builtin_function_start((int(A::*)(int) const) & A::i);
+}

diff  --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 8869b6bef04c8..02ffc879a1834 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -39,6 +39,13 @@ namespace addressof {
   S *ptmp = __builtin_addressof(S{}); // expected-error {{taking the address of a temporary}}
 }
 
+namespace function_start {
+void a(void) {}
+int n;
+void *p = __builtin_function_start(n);               // expected-error {{argument must be a function}}
+static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static_assert expression is not an integral constant expression}}
+} // namespace function_start
+
 void no_ms_builtins() {
   __assume(1); // expected-error {{use of undeclared}}
   __noop(1); // expected-error {{use of undeclared}}


        


More information about the cfe-commits mailing list