[clang] [llvm] [X86] Extend kCFI with a 3-bit arity indicator (PR #121070)

Scott Constable via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 24 13:37:04 PST 2024


https://github.com/scottconstable created https://github.com/llvm/llvm-project/pull/121070

Kernel Control Flow Integrity (kCFI) is a feature that hardens indirect calls by comparing a 32-bit hash of the function pointer's type against a hash of the target function's type. If the hashes do not match, the kernel may panic (or log the hash check failure, depending on the kernel's configuration). These hashes are computed at compile time by applying the xxHash64 algorithm to each mangled canonical function (or function pointer) type, then truncating the result to 32 bits. This hash is written into each indirect-callable function header by encoding it as the 32-bit immediate operand to a `MOVri` instruction, e.g.:
```
__cfi_foo:
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	movl	$199571451, %eax                # hash of foo's type = 0xBE537FB
foo:
        ...
```

This PR extends x86-based kCFI with a 3-bit arity indicator encoded in the `MOVri` instruction's register (reg) field as follows:

| Arity Indicator | Description | Encoding in reg field |
| --------------- | --------------- | --------------- |
| 0 | 0 parameters | EAX |
| 1 | 1 parameter in RDI | ECX |
| 2 | 2 parameters in RDI and RSI | EDX |
| 3 | 3 parameters in RDI, RSI, and RDX | EBX |
| 4 | 4 parameters in RDI, RSI, RDX, and RCX | ESP |
| 5 | 5 parameters in RDI, RSI, RDX, RCX, and R8 | EBP |
| 6 | 6 parameters in RDI, RSI, RDX, RCX, R8, and R9 | ESI |
| 7 | At least one parameter may be passed on the stack | EDI |

For example, if `foo` takes 3 register arguments and no stack arguments then the `MOVri` instruction in its kCFI header would instead be written as:
```
	movl	$199571451, %ebx                # hash of foo's type = 0xBE537FB
```

This PR will benefit other CFI approaches that build on kCFI, such as FineIBT. For example, this proposed enhancement to FineIBT must be able to infer (at kernel init time) which registers are live at an indirect call target: https://lkml.org/lkml/2024/9/27/982. If the arity bits are available in the kCFI function header, then this information is trivial to infer.

Note that there is another existing PR proposal that includes the 3-bit arity within the existing 32-bit immediate field, which introduces different security properties: https://github.com/llvm/llvm-project/pull/117121.

>From 81ff927ed34e9e8b61a432822f946544b411b3b4 Mon Sep 17 00:00:00 2001
From: Scott D Constable <scott.d.constable at intel.com>
Date: Mon, 23 Dec 2024 13:48:48 -0800
Subject: [PATCH] Implement a new kcfi_x86_arity feature that encodes an
 indirect call target's arity (i.e., the number of live-in registers) in the
 function's __cfi header.

---
 clang/include/clang/Basic/Features.def        |  1 +
 llvm/lib/Target/X86/X86AsmPrinter.cpp         | 20 +++++-
 .../X86/kcfi-patchable-function-prefix.ll     |  4 +-
 llvm/test/CodeGen/X86/kcfi.ll                 | 63 ++++++++++++++++++-
 4 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index c82b6d9b5f6c10..dca8f4dc0fbf76 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -254,6 +254,7 @@ FEATURE(is_trivially_constructible, LangOpts.CPlusPlus)
 FEATURE(is_trivially_copyable, LangOpts.CPlusPlus)
 FEATURE(is_union, LangOpts.CPlusPlus)
 FEATURE(kcfi, LangOpts.Sanitize.has(SanitizerKind::KCFI))
+FEATURE(kcfi_x86_arity, LangOpts.Sanitize.has(SanitizerKind::KCFI))
 FEATURE(modules, LangOpts.Modules)
 FEATURE(safe_stack, LangOpts.Sanitize.has(SanitizerKind::SafeStack))
 FEATURE(shadow_call_stack,
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index f01e47b41cf5e4..cb21d9bb5f2879 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -181,8 +181,26 @@ void X86AsmPrinter::emitKCFITypeId(const MachineFunction &MF) {
   // Embed the type hash in the X86::MOV32ri instruction to avoid special
   // casing object file parsers.
   EmitKCFITypePadding(MF);
+
+  Register MovReg = X86::EAX;
+  const auto &Triple = MF.getTarget().getTargetTriple();
+  if (Triple.isArch64Bit() && Triple.isOSLinux()) {
+    // Determine the function's arity (i.e., the number of arguments) at the ABI
+    // level by counting the number of parameters that are passed
+    // as registers, such as pointers and 64-bit (or smaller) integers. The
+    // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
+    // Additional parameters or parameters larger than 64 bits may be passed on
+    // the stack, in which case the arity is denoted as 7.
+    const unsigned ArityToRegMap[8] = {X86::EAX, X86::ECX, X86::EDX, X86::EBX,
+                                      X86::ESP, X86::EBP, X86::ESI, X86::EDI};
+    int Arity = MF.getInfo<X86MachineFunctionInfo>()->getArgumentStackSize() > 0
+                    ? 7
+                    : MF.getRegInfo().liveins().size();
+    MovReg = ArityToRegMap[Arity];
+  }
+
   EmitAndCountInstruction(MCInstBuilder(X86::MOV32ri)
-                              .addReg(X86::EAX)
+                              .addReg(MovReg)
                               .addImm(MaskKCFIType(Type->getZExtValue())));
 
   if (MAI->hasDotTypeDotSizeDirective()) {
diff --git a/llvm/test/CodeGen/X86/kcfi-patchable-function-prefix.ll b/llvm/test/CodeGen/X86/kcfi-patchable-function-prefix.ll
index 1b7bd7835e890c..deababc7fd5379 100644
--- a/llvm/test/CodeGen/X86/kcfi-patchable-function-prefix.ll
+++ b/llvm/test/CodeGen/X86/kcfi-patchable-function-prefix.ll
@@ -3,7 +3,7 @@
 ; CHECK:          .p2align 4
 ; CHECK-LABEL:    __cfi_f1:
 ; CHECK-COUNT-11:   nop
-; CHECK-NEXT:       movl $12345678, %eax
+; CHECK-NEXT:       movl $12345678, %ecx
 ; CHECK-LABEL:    .Lcfi_func_end0:
 ; CHECK-NEXT:     .size   __cfi_f1, .Lcfi_func_end0-__cfi_f1
 ; CHECK-LABEL:    f1:
@@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) {
 ; CHECK:          .p2align 4
 ; CHECK-LABEL:    __cfi_f3:
 ; CHECK-NOT:        nop
-; CHECK-NEXT:       movl $12345678, %eax
+; CHECK-NEXT:       movl $12345678, %ecx
 ; CHECK-COUNT-11:   nop
 ; CHECK-LABEL:    f3:
 define void @f3(ptr noundef %x) #0 !kcfi_type !1 {
diff --git a/llvm/test/CodeGen/X86/kcfi.ll b/llvm/test/CodeGen/X86/kcfi.ll
index 059efcc71b0eb8..91d706796b8e99 100644
--- a/llvm/test/CodeGen/X86/kcfi.ll
+++ b/llvm/test/CodeGen/X86/kcfi.ll
@@ -16,7 +16,7 @@
 ; ASM-NEXT:    nop
 ; ASM-NEXT:    nop
 ; ASM-NEXT:    nop
-; ASM-NEXT:    movl $12345678, %eax
+; ASM-NEXT:    movl $12345678, %ecx
 ; ASM-LABEL: .Lcfi_func_end0:
 ; ASM-NEXT:  .size   __cfi_f1, .Lcfi_func_end0-__cfi_f1
 define void @f1(ptr noundef %x) !kcfi_type !1 {
@@ -90,7 +90,7 @@ define void @f4(ptr noundef %x) #0 {
 
 ;; Ensure we emit Value + 1 for unwanted values (e.g. endbr64 == 4196274163).
 ; ASM-LABEL: __cfi_f5:
-; ASM: movl $4196274164, %eax # imm = 0xFA1E0FF4
+; ASM: movl $4196274164, %ecx # imm = 0xFA1E0FF4
 define void @f5(ptr noundef %x) !kcfi_type !2 {
 ; ASM-LABEL: f5:
 ; ASM: movl $98693132, %r10d # imm = 0x5E1F00C
@@ -100,7 +100,7 @@ define void @f5(ptr noundef %x) !kcfi_type !2 {
 
 ;; Ensure we emit Value + 1 for unwanted values (e.g. -endbr64 == 98693133).
 ; ASM-LABEL: __cfi_f6:
-; ASM: movl $98693134, %eax # imm = 0x5E1F00E
+; ASM: movl $98693134, %ecx # imm = 0x5E1F00E
 define void @f6(ptr noundef %x) !kcfi_type !3 {
 ; ASM-LABEL: f6:
 ; ASM: movl $4196274162, %r10d # imm = 0xFA1E0FF2
@@ -138,6 +138,60 @@ define void @f8() {
   ret void
 }
 
+%struct.S9 = type { [10 x i64] }
+
+;; Ensure that functions with large (e.g., greater than 8 bytes) arguments passed on the stack are assigned arity=7
+; ASM-LABEL: __cfi_f9:
+; ASM: movl	$199571451, %edi                # imm = 0xBE537FB
+define dso_local void @f9(ptr noundef byval(%struct.S9) align 8 %s) !kcfi_type !4  {
+entry:
+  ret void
+}
+
+;; Ensure that functions with fewer than 7 register arguments and no stack arguments are assigned arity<7
+; ASM-LABEL: __cfi_f10:
+; ASM: movl	$1046421190, %esi               # imm = 0x3E5F1EC6
+define dso_local void @f10(i32 noundef %v1, i32 noundef %v2, i32 noundef %v3, i32 noundef %v4, i32 noundef %v5, i32 noundef %v6) #0 !kcfi_type !5 {
+entry:
+  %v1.addr = alloca i32, align 4
+  %v2.addr = alloca i32, align 4
+  %v3.addr = alloca i32, align 4
+  %v4.addr = alloca i32, align 4
+  %v5.addr = alloca i32, align 4
+  %v6.addr = alloca i32, align 4
+  store i32 %v1, ptr %v1.addr, align 4
+  store i32 %v2, ptr %v2.addr, align 4
+  store i32 %v3, ptr %v3.addr, align 4
+  store i32 %v4, ptr %v4.addr, align 4
+  store i32 %v5, ptr %v5.addr, align 4
+  store i32 %v6, ptr %v6.addr, align 4
+  ret void
+}
+
+;; Ensure that functions with greater than 7 register arguments and no stack arguments are assigned arity=7
+; ASM-LABEL: __cfi_f11:
+; ASM: movl	$1342488295, %edi               # imm = 0x5004BEE7
+define dso_local void @f11(i32 noundef %v1, i32 noundef %v2, i32 noundef %v3, i32 noundef %v4, i32 noundef %v5, i32 noundef %v6, i32 noundef %v7, i32 noundef %v8) #0 !kcfi_type !6 {
+entry:
+  %v1.addr = alloca i32, align 4
+  %v2.addr = alloca i32, align 4
+  %v3.addr = alloca i32, align 4
+  %v4.addr = alloca i32, align 4
+  %v5.addr = alloca i32, align 4
+  %v6.addr = alloca i32, align 4
+  %v7.addr = alloca i32, align 4
+  %v8.addr = alloca i32, align 4
+  store i32 %v1, ptr %v1.addr, align 4
+  store i32 %v2, ptr %v2.addr, align 4
+  store i32 %v3, ptr %v3.addr, align 4
+  store i32 %v4, ptr %v4.addr, align 4
+  store i32 %v5, ptr %v5.addr, align 4
+  store i32 %v6, ptr %v6.addr, align 4
+  store i32 %v7, ptr %v7.addr, align 4
+  store i32 %v8, ptr %v8.addr, align 4
+  ret void
+}
+
 attributes #0 = { "target-features"="+retpoline-indirect-branches,+retpoline-indirect-calls" }
 
 !llvm.module.flags = !{!0}
@@ -145,3 +199,6 @@ attributes #0 = { "target-features"="+retpoline-indirect-branches,+retpoline-ind
 !1 = !{i32 12345678}
 !2 = !{i32 4196274163}
 !3 = !{i32 98693133}
+!4 = !{i32 199571451}
+!5 = !{i32 1046421190}
+!6 = !{i32 1342488295}



More information about the llvm-commits mailing list