[clang] [llvm] [X86] Enhance kCFI type IDs with a 3-bit arity indicator. (PR #117121)

Scott Constable via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 23 12:54:21 PST 2024


https://github.com/scottconstable updated https://github.com/llvm/llvm-project/pull/117121

>From b787d6d7a9c0904c5e47e32556103b8a5220a7d1 Mon Sep 17 00:00:00 2001
From: Scott D Constable <scott.d.constable at intel.com>
Date: Tue, 19 Nov 2024 15:51:05 -0800
Subject: [PATCH] Enhance KCFI type IDs with a 3-bit arity indicator.

---
 clang/lib/CodeGen/CodeGenModule.cpp           | 31 +++++++++++++++++--
 clang/test/CodeGen/kcfi-normalize.c           | 18 +++++++----
 clang/test/CodeGen/kcfi.c                     | 22 +++++++++++--
 llvm/lib/Transforms/Utils/ModuleUtils.cpp     | 31 +++++++++++++++++--
 .../GCOVProfiling/kcfi-normalize.ll           |  7 +++--
 llvm/test/Transforms/GCOVProfiling/kcfi.ll    |  7 +++--
 6 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index b854eeb62a80ce..f9c01edf4f0953 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2183,7 +2183,8 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
 }
 
 llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
-  if (auto *FnType = T->getAs<FunctionProtoType>())
+  auto *FnType = T->getAs<FunctionProtoType>();
+  if (FnType)
     T = getContext().getFunctionType(
         FnType->getReturnType(), FnType->getParamTypes(),
         FnType->getExtProtoInfo().withExceptionSpec(EST_None));
@@ -2196,8 +2197,32 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
   if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
     Out << ".normalized";
 
-  return llvm::ConstantInt::get(Int32Ty,
-                                static_cast<uint32_t>(llvm::xxHash64(OutName)));
+  uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(OutName));
+  const auto &Triple = getTarget().getTriple();
+  if (FnType && Triple.isX86() && Triple.isArch64Bit() && Triple.isOSLinux()) {
+    // Estimate the function's arity (i.e., the number of arguments) at the ABI
+    // level by counting the number of parameters that are likely to be passed
+    // as registers, such as pointers and 64-bit (or smaller) integers. The
+    // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
+    // Additional parameters or parameters larger than 64 bits may be passed on
+    // the stack, in which case the arity is denoted as 7.
+    bool MayHaveStackArgs = FnType->getNumParams() > 6;
+
+    for (unsigned int i = 0; !MayHaveStackArgs && i < FnType->getNumParams();
+         ++i) {
+      const Type *PT = FnType->getParamType(i).getTypePtr();
+      if (!(PT->isPointerType() || (PT->isIntegralOrEnumerationType() &&
+                                    getContext().getTypeSize(PT) <= 64)))
+        MayHaveStackArgs = true;
+    }
+
+    // The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
+    // to form an enhanced KCFI type ID. This can prevent, for example, a
+    // 3-arity function's ID from ever colliding with a 2-arity function's ID.
+    OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : FnType->getNumParams());
+  }
+
+  return llvm::ConstantInt::get(Int32Ty, OutHash);
 }
 
 void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c
index b9150e88f6ab5f..8b7445fc85e490 100644
--- a/clang/test/CodeGen/kcfi-normalize.c
+++ b/clang/test/CodeGen/kcfi-normalize.c
@@ -10,25 +10,31 @@
 void foo(void (*fn)(int), int arg) {
     // CHECK-LABEL: define{{.*}}foo
     // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE1:[0-9]+]]
-    // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 1162514891) ]
+    // KCFI ID = 0x2A548E59
+    // CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 710184537) ]
     fn(arg);
 }
 
 void bar(void (*fn)(int, int), int arg1, int arg2) {
     // CHECK-LABEL: define{{.*}}bar
     // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE2:[0-9]+]]
-    // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 448046469) ]
+    // KCFI ID = 0xD5A52C2A
+    // CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 -710595542) ]
     fn(arg1, arg2);
 }
 
 void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
     // CHECK-LABEL: define{{.*}}baz
     // CHECK-SAME: {{.*}}!kcfi_type ![[TYPE3:[0-9]+]]
-    // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -2049681433) ]
+    // KCFI ID = 0x2EA2BF3B
+    // CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 782417723) ]
     fn(arg1, arg2, arg3);
 }
 
 // CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1}
-// CHECK: ![[TYPE1]] = !{i32 -1143117868}
-// CHECK: ![[TYPE2]] = !{i32 -460921415}
-// CHECK: ![[TYPE3]] = !{i32 -333839615}
+// KCFI ID = DEEB3EA2
+// CHECK: ![[TYPE1]] = !{i32 -555008350}
+// KCFI ID = 24372DCB
+// CHECK: ![[TYPE2]] = !{i32 607595979}
+// KCFI ID = 0x60D0180C
+// CHECK: ![[TYPE3]] = !{i32 1624250380}
diff --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c
index 622843cedba50f..dc9e818a9f8cca 100644
--- a/clang/test/CodeGen/kcfi.c
+++ b/clang/test/CodeGen/kcfi.c
@@ -7,7 +7,6 @@
 
 /// Must emit __kcfi_typeid symbols for address-taken function declarations
 // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
-// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]"
 /// Must not __kcfi_typeid symbols for non-address-taken declarations
 // CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}"
 
@@ -29,7 +28,7 @@ int __call(fn_t f) __attribute__((__no_sanitize__("kcfi"))) {
 
 // CHECK: define dso_local{{.*}} i32 @{{call|_Z4callPFivE}}(ptr{{.*}} %f){{.*}}
 int call(fn_t f) {
-  // CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#HASH]]) ]
+  // CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#%d,HASH:]]) ]
   return f();
 }
 
@@ -48,6 +47,20 @@ static int f5(void) { return 2; }
 // CHECK-DAG: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @{{f6|_Z2f6v}}()
 extern int f6(void);
 
+typedef struct {
+  int *p1;
+  int *p2[16];
+} s_t;
+
+// CHECK: define internal{{.*}} i32 @{{f7|_ZL2f7PFi3s_tEPS_}}(ptr{{.*}} %f, ptr{{.*}} %s){{.*}}
+static int f7(int (*f)(s_t), s_t *s) {
+  // CHECK: call{{.*}} i32 %{{.*}} [ "kcfi"(i32 [[#%d,HASH4:]]) ]
+  return f(*s) + 1;
+}
+
+// CHECK: define internal{{.*}} i32 @{{f8|_ZL2f83s_t}}(ptr{{.*}} %s){{.*}} !kcfi_type ![[#%d,TYPE4:]]
+static int f8(s_t s) { return 0; }
+
 #ifndef __cplusplus
 // C: define internal ptr @resolver1() #[[#]] !kcfi_type ![[#]] {
 int ifunc1(int) __attribute__((ifunc("resolver1")));
@@ -59,12 +72,14 @@ long ifunc2(long) __attribute__((ifunc("resolver2")));
 #endif
 
 int test(void) {
+  s_t s;
   return call(f1) +
          __call((fn_t)f2) +
          call(f3) +
          call(f4) +
          f5() +
-         f6();
+         f6() +
+         f7(f8, &s);
 }
 
 #ifdef __cplusplus
@@ -85,3 +100,4 @@ void test_member_call(void) {
 // CHECK-DAG: ![[#TYPE]] = !{i32 [[#HASH]]}
 // CHECK-DAG: ![[#TYPE2]] = !{i32 [[#%d,HASH2:]]}
 // MEMBER-DAG: ![[#TYPE3]] = !{i32 [[#HASH3]]}
+// CHECK-DAG: ![[#TYPE4]] = !{i32 [[#HASH4]]}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 7249571f344938..a5a70f7162f3c7 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,8 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -21,6 +21,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/xxhash.h"
+#include "llvm/TargetParser/Triple.h"
 
 using namespace llvm;
 
@@ -208,10 +209,34 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
   std::string Type = MangledType.str();
   if (M.getModuleFlag("cfi-normalize-integers"))
     Type += ".normalized";
+
+  uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(Type));
+  Triple T(M.getTargetTriple());
+  if (T.isX86() && T.isArch64Bit() && T.isOSLinux()) {
+    // Estimate the function's arity (i.e., the number of arguments) at the ABI
+    // level by counting the number of parameters that are likely to be passed
+    // as registers, such as pointers and 64-bit (or smaller) integers. The
+    // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
+    // Additional parameters or parameters larger than 64 bits may be passed on
+    // the stack, in which case the arity is denoted as 7.
+    size_t NumParams = F.arg_size();
+    bool MayHaveStackArgs = NumParams > 6;
+
+    for (unsigned int i = 0; !MayHaveStackArgs && i < NumParams; ++i) {
+      const llvm::Type *PT = F.getArg(i)->getType();
+      if (!(PT->isPointerTy() || PT->getIntegerBitWidth() <= 64))
+        MayHaveStackArgs = true;
+    }
+
+    // The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
+    // to form an enhanced KCFI type ID. This can prevent, for example, a
+    // 3-arity function's ID from ever colliding with a 2-arity function's ID.
+    OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : NumParams);
+  }
+
   F.setMetadata(LLVMContext::MD_kcfi_type,
                 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
-                                     Type::getInt32Ty(Ctx),
-                                     static_cast<uint32_t>(xxHash64(Type))))));
+                                     Type::getInt32Ty(Ctx), OutHash))));
   // If the module was compiled with -fpatchable-function-entry, ensure
   // we use the same patchable-function-prefix.
   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
index 9ad0418025e56c..dacecf82da6aae 100644
--- a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
@@ -3,10 +3,10 @@
 ; RUN: mkdir -p %t && cd %t
 ; RUN: opt < %s -S -passes=insert-gcov-profiling \
 ; RUN:  -mtriple=x86_64-unknown-linux-gnu | FileCheck \
-; RUN:  --check-prefixes=CHECK,CHECK-CTOR-INIT %s
+; RUN:  --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
 ; RUN: opt < %s -S -passes=insert-gcov-profiling \
 ; RUN:  -mtriple=powerpc64-ibm-aix | FileCheck \
-; RUN:  --check-prefixes=CHECK,CHECK-RT-INIT %s
+; RUN:  --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s
 
 ; Check for gcov initialization function pointers when we initialize
 ; the writeout and reset functions in the runtime.
@@ -39,4 +39,5 @@ entry:
 ; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
 ; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]
 
-; CHECK: ![[#TYPE]] = !{i32 -440107680}
+; CHECK-PPC: ![[#TYPE]] = !{i32 -440107680}
+; CHECK-X86: ![[#TYPE]] = !{i32 774105856}
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi.ll b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
index 5e0e91fc92f5f7..c0e5722e08d743 100644
--- a/llvm/test/Transforms/GCOVProfiling/kcfi.ll
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
@@ -2,10 +2,10 @@
 ; RUN: mkdir -p %t && cd %t
 ; RUN: opt < %s -S -passes=insert-gcov-profiling \
 ; RUN:  -mtriple=x86_64-unknown-linux-gnu | FileCheck \
-; RUN:  --check-prefixes=CHECK,CHECK-CTOR-INIT %s
+; RUN:  --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
 ; RUN: opt < %s -S -passes=insert-gcov-profiling \
 ; RUN:  -mtriple=powerpc64-ibm-aix | FileCheck \
-; RUN:  --check-prefixes=CHECK,CHECK-RT-INIT %s
+; RUN:  --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s
 
 ; Check for gcov initialization function pointers when we initialize
 ; the writeout and reset functions in the runtime.
@@ -37,4 +37,5 @@ entry:
 ; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
 ; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]
 
-; CHECK: ![[#TYPE]] = !{i32 -1522505972}
+; CHECK-PPC: ![[#TYPE]] = !{i32 -1522505972}
+; CHECK-X86: ![[#TYPE]] = !{i32 704854112}



More information about the llvm-commits mailing list