[clang] fd88089 - -fsanitize=vptr: Change hash function and simplify bit mixer

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 19 22:39:58 PDT 2024


Author: Fangrui Song
Date: 2024-06-19T22:39:54-07:00
New Revision: fd88089151e66a4cb1d90aaa224e4cb4e7a748f4

URL: https://github.com/llvm/llvm-project/commit/fd88089151e66a4cb1d90aaa224e4cb4e7a748f4
DIFF: https://github.com/llvm/llvm-project/commit/fd88089151e66a4cb1d90aaa224e4cb4e7a748f4.diff

LOG: -fsanitize=vptr: Change hash function and simplify bit mixer

llvm::hash_value is not guaranteed to be deterministic. Use the
deterministic xxh3_64bits. A strong bit mixer isn't necessary. Use a
simpler one that works well with pointers.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/test/CodeGenCXX/catch-undef-behavior.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index b6718a46e8c50..a5811be2cc02e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -650,16 +650,13 @@ unsigned CodeGenFunction::getAccessedFieldNo(unsigned Idx,
       ->getZExtValue();
 }
 
-/// Emit the hash_16_bytes function from include/llvm/ADT/Hashing.h.
-static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low,
-                                    llvm::Value *High) {
-  llvm::Value *KMul = Builder.getInt64(0x9ddfea08eb382d69ULL);
-  llvm::Value *K47 = Builder.getInt64(47);
-  llvm::Value *A0 = Builder.CreateMul(Builder.CreateXor(Low, High), KMul);
-  llvm::Value *A1 = Builder.CreateXor(Builder.CreateLShr(A0, K47), A0);
-  llvm::Value *B0 = Builder.CreateMul(Builder.CreateXor(High, A1), KMul);
-  llvm::Value *B1 = Builder.CreateXor(Builder.CreateLShr(B0, K47), B0);
-  return Builder.CreateMul(B1, KMul);
+static llvm::Value *emitHashMix(CGBuilderTy &Builder, llvm::Value *Acc,
+                                llvm::Value *Ptr) {
+  llvm::Value *A0 =
+      Builder.CreateMul(Ptr, Builder.getInt64(0xbf58476d1ce4e5b9u));
+  llvm::Value *A1 =
+      Builder.CreateXor(A0, Builder.CreateLShr(A0, Builder.getInt64(31)));
+  return Builder.CreateXor(Acc, A1);
 }
 
 bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) {
@@ -821,11 +818,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
       EmitBlock(VptrNotNull);
     }
 
-    // Compute a hash of the mangled name of the type.
-    //
-    // FIXME: This is not guaranteed to be deterministic! Move to a
-    //        fingerprinting mechanism once LLVM provides one. For the time
-    //        being the implementation happens to be deterministic.
+    // Compute a deterministic hash of the mangled name of the type.
     SmallString<64> MangledName;
     llvm::raw_svector_ostream Out(MangledName);
     CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty.getUnqualifiedType(),
@@ -834,15 +827,13 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     // Contained in NoSanitizeList based on the mangled type.
     if (!CGM.getContext().getNoSanitizeList().containsType(SanitizerKind::Vptr,
                                                            Out.str())) {
-      llvm::hash_code TypeHash = hash_value(Out.str());
-
-      // Load the vptr, and compute hash_16_bytes(TypeHash, vptr).
-      llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash);
+      // Load the vptr, and mix it with TypeHash.
+      llvm::Value *TypeHash =
+          llvm::ConstantInt::get(Int64Ty, xxh3_64bits(Out.str()));
       Address VPtrAddr(Ptr, IntPtrTy, getPointerAlign());
       llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr);
-      llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty);
-
-      llvm::Value *Hash = emitHash16Bytes(Builder, Low, High);
+      llvm::Value *Hash =
+          emitHashMix(Builder, TypeHash, Builder.CreateZExt(VPtrVal, Int64Ty));
       Hash = Builder.CreateTrunc(Hash, IntPtrTy);
 
       // Look the hash up in our cache.

diff  --git a/clang/test/CodeGenCXX/catch-undef-behavior.cpp b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
index 6fd7d16f86369..1a0e98a661375 100644
--- a/clang/test/CodeGenCXX/catch-undef-behavior.cpp
+++ b/clang/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -67,15 +67,10 @@ void member_access(S *p) {
   // The two hash values are for 64- and 32-bit Clang binaries, respectively.
   // FIXME: We should produce a 64-bit value either way.
   //
-  // CHECK-NEXT: xor i64 {{-4030275160588942838|1107558922}}, %[[VPTR]]
-  // CHECK-NEXT: mul i64 {{.*}}, -7070675565921424023
-  // CHECK-NEXT: lshr i64 {{.*}}, 47
-  // CHECK-NEXT: xor i64
-  // CHECK-NEXT: xor i64 %[[VPTR]]
-  // CHECK-NEXT: mul i64 {{.*}}, -7070675565921424023
-  // CHECK-NEXT: lshr i64 {{.*}}, 47
-  // CHECK-NEXT: xor i64
-  // CHECK-NEXT: %[[HASH:.*]] = mul i64 {{.*}}, -7070675565921424023
+  // CHECK-NEXT: mul i64 %[[VPTR]], -4658895280553007687, !nosanitize
+  // CHECK-NEXT: lshr i64 {{.*}}, 31
+  // CHECK-NEXT: xor i64 %[[#]], %[[#]]
+  // CHECK-NEXT: %[[HASH:.*]] = xor i64 4589795628539611399, %[[#]], !nosanitize
   //
   // Check the hash against the table:
   //
@@ -116,7 +111,7 @@ void member_access(S *p) {
   // (3b) Check that 'p' actually points to an 'S'
 
   // CHECK: load i64, ptr
-  // CHECK-NEXT: xor i64 {{-4030275160588942838|1107558922}},
+  // CHECK-NEXT: mul i64 %[[#]], -4658895280553007687, !nosanitize
   // [...]
   // CHECK: getelementptr inbounds [128 x i64], ptr @__ubsan_vptr_type_cache, i32 0, i64 %
   // CHECK: br i1


        


More information about the cfe-commits mailing list