[clang] [llvm] [X86] Enhance kCFI type IDs with a 3-bit arity indicator. (PR #117121)
Scott Constable via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 23 12:54:21 PST 2024
https://github.com/scottconstable updated https://github.com/llvm/llvm-project/pull/117121
>From b787d6d7a9c0904c5e47e32556103b8a5220a7d1 Mon Sep 17 00:00:00 2001
From: Scott D Constable <scott.d.constable at intel.com>
Date: Tue, 19 Nov 2024 15:51:05 -0800
Subject: [PATCH] Enhance KCFI type IDs with a 3-bit arity indicator.
---
clang/lib/CodeGen/CodeGenModule.cpp | 31 +++++++++++++++++--
clang/test/CodeGen/kcfi-normalize.c | 18 +++++++----
clang/test/CodeGen/kcfi.c | 22 +++++++++++--
llvm/lib/Transforms/Utils/ModuleUtils.cpp | 31 +++++++++++++++++--
.../GCOVProfiling/kcfi-normalize.ll | 7 +++--
llvm/test/Transforms/GCOVProfiling/kcfi.ll | 7 +++--
6 files changed, 95 insertions(+), 21 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index b854eeb62a80ce..f9c01edf4f0953 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2183,7 +2183,8 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
}
llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
- if (auto *FnType = T->getAs<FunctionProtoType>())
+ auto *FnType = T->getAs<FunctionProtoType>();
+ if (FnType)
T = getContext().getFunctionType(
FnType->getReturnType(), FnType->getParamTypes(),
FnType->getExtProtoInfo().withExceptionSpec(EST_None));
@@ -2196,8 +2197,32 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
Out << ".normalized";
- return llvm::ConstantInt::get(Int32Ty,
- static_cast<uint32_t>(llvm::xxHash64(OutName)));
+ uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(OutName));
+ const auto &Triple = getTarget().getTriple();
+ if (FnType && Triple.isX86() && Triple.isArch64Bit() && Triple.isOSLinux()) {
+ // Estimate the function's arity (i.e., the number of arguments) at the ABI
+ // level by counting the number of parameters that are likely to be passed
+ // as registers, such as pointers and 64-bit (or smaller) integers. The
+ // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
+ // Additional parameters or parameters larger than 64 bits may be passed on
+ // the stack, in which case the arity is denoted as 7.
+ bool MayHaveStackArgs = FnType->getNumParams() > 6;
+
+ for (unsigned int i = 0; !MayHaveStackArgs && i < FnType->getNumParams();
+ ++i) {
+ const Type *PT = FnType->getParamType(i).getTypePtr();
+ if (!(PT->isPointerType() || (PT->isIntegralOrEnumerationType() &&
+ getContext().getTypeSize(PT) <= 64)))
+ MayHaveStackArgs = true;
+ }
+
+ // The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
+ // to form an enhanced KCFI type ID. This can prevent, for example, a
+ // 3-arity function's ID from ever colliding with a 2-arity function's ID.
+ OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : FnType->getNumParams());
+ }
+
+ return llvm::ConstantInt::get(Int32Ty, OutHash);
}
void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c
index b9150e88f6ab5f..8b7445fc85e490 100644
--- a/clang/test/CodeGen/kcfi-normalize.c
+++ b/clang/test/CodeGen/kcfi-normalize.c
@@ -10,25 +10,31 @@
void foo(void (*fn)(int), int arg) {
// CHECK-LABEL: define{{.*}}foo
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE1:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 1162514891) ]
+ // KCFI ID = 0x2A548E59
+ // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 710184537) ]
fn(arg);
}
void bar(void (*fn)(int, int), int arg1, int arg2) {
// CHECK-LABEL: define{{.*}}bar
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE2:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 448046469) ]
+ // KCFI ID = 0xD5A52C2A
+ // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 -710595542) ]
fn(arg1, arg2);
}
void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
// CHECK-LABEL: define{{.*}}baz
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE3:[0-9]+]]
- // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -2049681433) ]
+ // KCFI ID = 0x2EA2BF3B
+ // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 782417723) ]
fn(arg1, arg2, arg3);
}
// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1}
-// CHECK: ![[TYPE1]] = !{i32 -1143117868}
-// CHECK: ![[TYPE2]] = !{i32 -460921415}
-// CHECK: ![[TYPE3]] = !{i32 -333839615}
+// KCFI ID = DEEB3EA2
+// CHECK: ![[TYPE1]] = !{i32 -555008350}
+// KCFI ID = 24372DCB
+// CHECK: ![[TYPE2]] = !{i32 607595979}
+// KCFI ID = 0x60D0180C
+// CHECK: ![[TYPE3]] = !{i32 1624250380}
diff --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c
index 622843cedba50f..dc9e818a9f8cca 100644
--- a/clang/test/CodeGen/kcfi.c
+++ b/clang/test/CodeGen/kcfi.c
@@ -7,7 +7,6 @@
/// Must emit __kcfi_typeid symbols for address-taken function declarations
// CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
-// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]"
/// Must not __kcfi_typeid symbols for non-address-taken declarations
// CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}"
@@ -29,7 +28,7 @@ int __call(fn_t f) __attribute__((__no_sanitize__("kcfi"))) {
// CHECK: define dso_local{{.*}} i32 @{{call|_Z4callPFivE}}(ptr{{.*}} %f){{.*}}
int call(fn_t f) {
- // CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#HASH]]) ]
+ // CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#%d,HASH:]]) ]
return f();
}
@@ -48,6 +47,20 @@ static int f5(void) { return 2; }
// CHECK-DAG: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @{{f6|_Z2f6v}}()
extern int f6(void);
+typedef struct {
+ int *p1;
+ int *p2[16];
+} s_t;
+
+// CHECK: define internal{{.*}} i32 @{{f7|_ZL2f7PFi3s_tEPS_}}(ptr{{.*}} %f, ptr{{.*}} %s){{.*}}
+static int f7(int (*f)(s_t), s_t *s) {
+ // CHECK: call{{.*}} i32 %{{.*}} [ "kcfi"(i32 [[#%d,HASH4:]]) ]
+ return f(*s) + 1;
+}
+
+// CHECK: define internal{{.*}} i32 @{{f8|_ZL2f83s_t}}(ptr{{.*}} %s){{.*}} !kcfi_type ![[#%d,TYPE4:]]
+static int f8(s_t s) { return 0; }
+
#ifndef __cplusplus
// C: define internal ptr @resolver1() #[[#]] !kcfi_type ![[#]] {
int ifunc1(int) __attribute__((ifunc("resolver1")));
@@ -59,12 +72,14 @@ long ifunc2(long) __attribute__((ifunc("resolver2")));
#endif
int test(void) {
+ s_t s;
return call(f1) +
__call((fn_t)f2) +
call(f3) +
call(f4) +
f5() +
- f6();
+ f6() +
+ f7(f8, &s);
}
#ifdef __cplusplus
@@ -85,3 +100,4 @@ void test_member_call(void) {
// CHECK-DAG: ![[#TYPE]] = !{i32 [[#HASH]]}
// CHECK-DAG: ![[#TYPE2]] = !{i32 [[#%d,HASH2:]]}
// MEMBER-DAG: ![[#TYPE3]] = !{i32 [[#HASH3]]}
+// CHECK-DAG: ![[#TYPE4]] = !{i32 [[#HASH4]]}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 7249571f344938..a5a70f7162f3c7 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -21,6 +21,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -208,10 +209,34 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
std::string Type = MangledType.str();
if (M.getModuleFlag("cfi-normalize-integers"))
Type += ".normalized";
+
+ uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(Type));
+ Triple T(M.getTargetTriple());
+ if (T.isX86() && T.isArch64Bit() && T.isOSLinux()) {
+ // Estimate the function's arity (i.e., the number of arguments) at the ABI
+ // level by counting the number of parameters that are likely to be passed
+ // as registers, such as pointers and 64-bit (or smaller) integers. The
+ // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
+ // Additional parameters or parameters larger than 64 bits may be passed on
+ // the stack, in which case the arity is denoted as 7.
+ size_t NumParams = F.arg_size();
+ bool MayHaveStackArgs = NumParams > 6;
+
+ for (unsigned int i = 0; !MayHaveStackArgs && i < NumParams; ++i) {
+ const llvm::Type *PT = F.getArg(i)->getType();
+ if (!(PT->isPointerTy() || PT->getIntegerBitWidth() <= 64))
+ MayHaveStackArgs = true;
+ }
+
+ // The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
+ // to form an enhanced KCFI type ID. This can prevent, for example, a
+ // 3-arity function's ID from ever colliding with a 2-arity function's ID.
+ OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : NumParams);
+ }
+
F.setMetadata(LLVMContext::MD_kcfi_type,
MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
- Type::getInt32Ty(Ctx),
- static_cast<uint32_t>(xxHash64(Type))))));
+ Type::getInt32Ty(Ctx), OutHash))));
// If the module was compiled with -fpatchable-function-entry, ensure
// we use the same patchable-function-prefix.
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
index 9ad0418025e56c..dacecf82da6aae 100644
--- a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
@@ -3,10 +3,10 @@
; RUN: mkdir -p %t && cd %t
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=x86_64-unknown-linux-gnu | FileCheck \
-; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT %s
+; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=powerpc64-ibm-aix | FileCheck \
-; RUN: --check-prefixes=CHECK,CHECK-RT-INIT %s
+; RUN: --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s
; Check for gcov initialization function pointers when we initialize
; the writeout and reset functions in the runtime.
@@ -39,4 +39,5 @@ entry:
; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]
-; CHECK: ![[#TYPE]] = !{i32 -440107680}
+; CHECK-PPC: ![[#TYPE]] = !{i32 -440107680}
+; CHECK-X86: ![[#TYPE]] = !{i32 774105856}
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi.ll b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
index 5e0e91fc92f5f7..c0e5722e08d743 100644
--- a/llvm/test/Transforms/GCOVProfiling/kcfi.ll
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
@@ -2,10 +2,10 @@
; RUN: mkdir -p %t && cd %t
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=x86_64-unknown-linux-gnu | FileCheck \
-; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT %s
+; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=powerpc64-ibm-aix | FileCheck \
-; RUN: --check-prefixes=CHECK,CHECK-RT-INIT %s
+; RUN: --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s
; Check for gcov initialization function pointers when we initialize
; the writeout and reset functions in the runtime.
@@ -37,4 +37,5 @@ entry:
; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]
-; CHECK: ![[#TYPE]] = !{i32 -1522505972}
+; CHECK-PPC: ![[#TYPE]] = !{i32 -1522505972}
+; CHECK-X86: ![[#TYPE]] = !{i32 704854112}
More information about the llvm-commits
mailing list