[clang] ad31a2d - Change -fsanitize=function to place two words before the function entry

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Fri May 19 07:50:35 PDT 2023


Author: Fangrui Song
Date: 2023-05-19T07:50:29-07:00
New Revision: ad31a2dcadfcd57a99bbd6d0050d2690fd84a883

URL: https://github.com/llvm/llvm-project/commit/ad31a2dcadfcd57a99bbd6d0050d2690fd84a883
DIFF: https://github.com/llvm/llvm-project/commit/ad31a2dcadfcd57a99bbd6d0050d2690fd84a883.diff

LOG: Change -fsanitize=function to place two words before the function entry

The current implementation of -fsanitize=function places two words (the prolog
signature and the RTTI proxy) at the function entry, which makes the feature
incompatible with Intel Indirect Branch Tracking (IBT) that needs an ENDBR instruction
at the function entry. To allow the combination, move the two words before the
function entry, similar to -fsanitize=kcfi.

Armv8.5 Branch Target Identification (BTI) has a similar requirement.

Note: for IBT and BTI, whether a function gets a marker instruction at the entry
generally cannot be assumed (it can be disabled by a function attribute or
stronger LTO optimizations).

It is extremely unlikely for two words preceding a function entry to be
inaccessible. One way to achieve this is by ensuring that a function is
aligned at a page boundary and making the preceding page unmapped or
unreadable. This is not reasonable for application or library code.
(Think: the first text section has crt* code not instrumented by
-fsanitize=function.)

We use 0xc105cafe for all targets. .long 0xc105cafe disassembles to invalid
instructions on all architectures I have tested, except Power where it is
`lfs 8, -13570(5)` (Load Floating-Point with a weird offset, unlikely to be used in real code).

---

For the removed function in AsmPrinter.cpp, remove an assert: `mdconst::extract`
already asserts non-nullness.

For compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp,
when the function doesn't have prolog/epilog (-O1 and above), after moving the two words,
the address of the function equals the address of ret instruction,
so symbolizing the function will additionally get a non-zero column number.
Adjust the test to allow an optional column number.
```
  .long   3238382334
  .long   .L__llvm_rtti_proxy-_Z1fv
_Z1fv:   // symbolizing here retrieves the line table entry from the second .loc
  .file   0 ...
  .loc    0 1 0
  .cfi_startproc
  .loc    0 2 1 prologue_end
  retq
```

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D148665

Added: 
    

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/TargetInfo.cpp
    clang/lib/CodeGen/TargetInfo.h
    clang/test/CodeGen/ubsan-function.cpp
    clang/test/CodeGenCXX/catch-undef-behavior.cpp
    clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
    compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
    llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
    llvm/test/CodeGen/X86/func-sanitizer.ll
    llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 44b97d272634e..834c2fda5855b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5368,7 +5368,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
       llvm::Value *CalleePrefixStruct = Builder.CreateBitCast(
           CalleePtr, llvm::PointerType::getUnqual(PrefixStructTy));
       llvm::Value *CalleeSigPtr =
-          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 0);
+          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 0);
       llvm::Value *CalleeSig =
           Builder.CreateAlignedLoad(PrefixSigType, CalleeSigPtr, getIntAlign());
       llvm::Value *CalleeSigMatch = Builder.CreateICmpEQ(CalleeSig, PrefixSig);
@@ -5379,7 +5379,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
 
       EmitBlock(TypeCheck);
       llvm::Value *CalleeRTTIPtr =
-          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1);
+          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 1);
       llvm::Value *CalleeRTTIEncoded =
           Builder.CreateAlignedLoad(Int32Ty, CalleeRTTIPtr, getPointerAlign());
       llvm::Value *CalleeRTTI =

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index e4d75eff08423..13c2a6b65f76e 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1293,15 +1293,6 @@ class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
                                 std::string &AsmString,
                                 unsigned NumOutputs) const override;
 
-  llvm::Constant *
-  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
-    unsigned Sig = (0xeb << 0) |  // jmp rel8
-                   (0x06 << 8) |  //           .+0x08
-                   ('v' << 16) |
-                   ('2' << 24);
-    return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
-  }
-
   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "movl\t%ebp, %ebp"
            "\t\t// marker for objc_retainAutoreleaseReturnValue";
@@ -2539,15 +2530,6 @@ class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
     return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
   }
 
-  llvm::Constant *
-  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
-    unsigned Sig = (0xeb << 0) | // jmp rel8
-                   (0x06 << 8) | //           .+0x08
-                   ('v' << 16) |
-                   ('2' << 24);
-    return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
-  }
-
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     if (GV->isDeclaration())

diff  --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index b225a5c35adc5..7b5532eaf7b65 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -199,9 +199,10 @@ class TargetCodeGenInfo {
 
   /// Return a constant used by UBSan as a signature to identify functions
   /// possessing type information, or 0 if the platform is unsupported.
+  /// This magic number is invalid instruction encoding in many targets.
   virtual llvm::Constant *
   getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
-    return nullptr;
+    return llvm::ConstantInt::get(CGM.Int32Ty, 0xc105cafe);
   }
 
   /// Determine whether a call to an unprototyped functions under

diff  --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp
index 3f69b44b9f5d1..76fb88c923a2a 100644
--- a/clang/test/CodeGen/ubsan-function.cpp
+++ b/clang/test/CodeGen/ubsan-function.cpp
@@ -5,12 +5,12 @@
 void fun() {}
 
 // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(ptr noundef %f)
-// CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 0, i32 0, !nosanitize
+// CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 -1, i32 0, !nosanitize
 // CHECK: load i32, ptr {{.*}}, align {{.*}}, !nosanitize
-// CHECK: icmp eq i32 {{.*}}, 846595819, !nosanitize
+// CHECK: icmp eq i32 {{.*}}, -1056584962, !nosanitize
 // CHECK: br i1 {{.*}}, label %[[LABEL1:.*]], label %[[LABEL4:.*]], !nosanitize
 // CHECK: [[LABEL1]]:
-// CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 0, i32 1, !nosanitize
+// CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 -1, i32 1, !nosanitize
 // CHECK: load i32, ptr {{.*}}, align {{.*}}, !nosanitize
 // CHECK: icmp eq ptr {{.*}}, @_ZTIFvvE, !nosanitize
 // CHECK: br i1 {{.*}}, label %[[LABEL3:.*]], label %[[LABEL2:[^,]*]], {{.*}}!nosanitize
@@ -22,4 +22,4 @@ void fun() {}
 // CHECK: br label %[[LABEL4]], !nosanitize
 void caller(void (*f)()) { f(); }
 
-// CHECK: ![[FUNCSAN]] = !{i32 846595819, ptr @[[PROXY]]}
+// CHECK: ![[FUNCSAN]] = !{i32 -1056584962, ptr @[[PROXY]]}

diff  --git a/clang/test/CodeGenCXX/catch-undef-behavior.cpp b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
index 50f1e2f3b147e..a116b4e5e9b84 100644
--- a/clang/test/CodeGenCXX/catch-undef-behavior.cpp
+++ b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -402,13 +402,13 @@ void indirect_function_call(void (*p)(int)) {
   // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i32 }>*
 
   // Signature check
-  // CHECK-NEXT: [[SIGPTR:%.+]] = getelementptr <{ i32, i32 }>, <{ i32, i32 }>* [[PTR]], i32 0, i32 0
+  // CHECK-NEXT: [[SIGPTR:%.+]] = getelementptr <{ i32, i32 }>, <{ i32, i32 }>* [[PTR]], i32 -1, i32 0
   // CHECK-NEXT: [[SIG:%.+]] = load i32, i32* [[SIGPTR]]
-  // CHECK-NEXT: [[SIGCMP:%.+]] = icmp eq i32 [[SIG]], 846595819
+  // CHECK-NEXT: [[SIGCMP:%.+]] = icmp eq i32 [[SIG]], -1056584962
   // CHECK-NEXT: br i1 [[SIGCMP]]
 
   // RTTI pointer check
-  // CHECK: [[RTTIPTR:%.+]] = getelementptr <{ i32, i32 }>, <{ i32, i32 }>* [[PTR]], i32 0, i32 1
+  // CHECK: [[RTTIPTR:%.+]] = getelementptr <{ i32, i32 }>, <{ i32, i32 }>* [[PTR]], i32 -1, i32 1
   // CHECK-NEXT: [[RTTIEncIntTrunc:%.+]] = load i32, i32* [[RTTIPTR]]
   // CHECK-NEXT: [[RTTIEncInt:%.+]] = sext i32 [[RTTIEncIntTrunc]] to i64
   // CHECK-NEXT: [[FuncAddrInt:%.+]] = ptrtoint void (i32)* {{.*}} to i64
@@ -750,4 +750,4 @@ void ThisAlign::this_align_lambda_2() {
 
 // CHECK: attributes [[NR_NUW]] = { noreturn nounwind }
 
-// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]}
+// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 -1056584962, i8** [[PROXY]]}

diff  --git a/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp b/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
index 7fbf4970e249f..10ca603a64c1a 100644
--- a/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
+++ b/clang/test/CodeGenCXX/ubsan-function-noexcept.cpp
@@ -14,4 +14,4 @@ void g(void (*p)() noexcept) {
   p();
 }
 
-// CHECK: ![[FUNCSAN]] = !{i32 846595819, ptr [[PROXY]]}
+// CHECK: ![[FUNCSAN]] = !{i32 -1056584962, ptr [[PROXY]]}

diff  --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
index 567db7a872cc1..f59aee66dc299 100644
--- a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
@@ -61,7 +61,7 @@ void make_valid_call() {
 
 void make_invalid_call() {
   // CHECK: function.cpp:[[@LINE+4]]:3: runtime error: call to function f() through pointer to incorrect function type 'void (*)(int)'
-  // CHECK-NEXT: function.cpp:[[@LINE-11]]: note: f() defined here
+  // CHECK-NEXT: function.cpp:[[@LINE-11]]:{{(11:)?}} note: f() defined here
   // NOSYM: function.cpp:[[@LINE+2]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)(int)'
   // NOSYM-NEXT: ({{.*}}+0x{{.*}}): note: (unknown) defined here
   reinterpret_cast<void (*)(int)>(reinterpret_cast<uintptr_t>(f))(42);

diff  --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 746b7e409cf75..5b05b1a069ef3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -971,6 +971,21 @@ void AsmPrinter::emitFunctionHeader() {
     CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
+  // Emit the function prologue data for the indirect call sanitizer.
+  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+    assert(MD->getNumOperands() == 2);
+
+    auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+    auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
+    emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+    const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+    const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+    const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+    // Use 32 bit since only small code model is supported.
+    OutStreamer->emitValue(PCRel, 4u);
+  }
+
   if (isVerbose()) {
     F.printAsOperand(OutStreamer->getCommentOS(),
                      /*PrintType=*/false, F.getParent());
@@ -1025,24 +1040,6 @@ void AsmPrinter::emitFunctionHeader() {
   // Emit the prologue data.
   if (F.hasPrologueData())
     emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
-  // Emit the function prologue data for the indirect call sanitizer.
-  if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
-    assert(TM.getTargetTriple().getArch() == Triple::x86 ||
-           TM.getTargetTriple().getArch() == Triple::x86_64);
-    assert(MD->getNumOperands() == 2);
-
-    auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
-    auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
-    assert(PrologueSig && FTRTTIProxy);
-    emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
-    const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
-    const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
-    const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
-    // Use 32 bit since only small code model is supported.
-    OutStreamer->emitValue(PCRel, 4u);
-  }
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the

diff  --git a/llvm/test/CodeGen/X86/func-sanitizer.ll b/llvm/test/CodeGen/X86/func-sanitizer.ll
index 9825685b7c307..859987fe47d31 100644
--- a/llvm/test/CodeGen/X86/func-sanitizer.ll
+++ b/llvm/test/CodeGen/X86/func-sanitizer.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-; CHECK: _Z3funv:
-; CHECK:         .cfi_startproc
-; CHECK:         .long   846595819
-; CHECK:         .long   .L__llvm_rtti_proxy-_Z3funv
-; CHECK: .L__llvm_rtti_proxy:
-; CHECK:         .quad   i
-; CHECK:         .size   .L__llvm_rtti_proxy, 8
+; CHECK:      .type _Z3funv, at function
+; CHECK-NEXT:   .long   3238382334  # 0xc105cafe
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-_Z3funv
+; CHECK-NEXT: _Z3funv:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; CHECK-NEXT:   retq
 
 @i = linkonce_odr constant i32 1
 @__llvm_rtti_proxy = private unnamed_addr constant ptr @i
@@ -15,4 +15,4 @@ define dso_local void @_Z3funv() !func_sanitize !0 {
   ret void
 }
 
-!0 = !{i32 846595819, ptr @__llvm_rtti_proxy}
+!0 = !{i32 3238382334, ptr @__llvm_rtti_proxy}

diff  --git a/llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll b/llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
index 9d1db9cd6b083..eaadeef4803a1 100644
--- a/llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
+++ b/llvm/test/CodeGen/X86/patchable-function-entry-ibt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=i686 %s -o - | FileCheck --check-prefixes=CHECK,32 %s
 ; RUN: llc -mtriple=x86_64 %s -o - | FileCheck --check-prefixes=CHECK,64 %s
 
+ at _ZTIFvvE = linkonce_odr constant i32 1
+ at __llvm_rtti_proxy = private unnamed_addr constant ptr @_ZTIFvvE
+
 ;; -fpatchable-function-entry=0 -fcf-protection=branch
 define void @f0() "patchable-function-entry"="0" {
 ; CHECK-LABEL: f0:
@@ -83,6 +86,25 @@ entry:
   ret void
 }
 
+;; Test the interaction with -fsanitize=function.
+; CHECK:      .type sanitize_function, at function
+; CHECK-NEXT: .Ltmp{{.*}}:
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   .long   3238382334
+; CHECK-NEXT:   .long   .L__llvm_rtti_proxy-sanitize_function
+; CHECK-NEXT: sanitize_function:
+; CHECK-NEXT: .Lfunc_begin{{.*}}:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:   # %bb.0:
+; 32-NEXT:      endbr32
+; 64-NEXT:      endbr64
+; CHECK-NEXT:   nop
+; CHECK-NEXT:   ret
+define void @sanitize_function(ptr noundef %x) "patchable-function-prefix"="1" "patchable-function-entry"="1" !func_sanitize !1 {
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 8, !"cf-protection-branch", i32 1}
+!1 = !{i32 3238382334, ptr @__llvm_rtti_proxy}


        


More information about the cfe-commits mailing list