[llvm] de02a75 - [PGO] Fix computation of function Hash

via llvm-commits llvm-commits at lists.llvm.org
Wed May 27 00:15:36 PDT 2020


Author: serge-sans-paille
Date: 2020-05-27T09:15:21+02:00
New Revision: de02a75e398415bad4df27b4547c25b896c8bf3b

URL: https://github.com/llvm/llvm-project/commit/de02a75e398415bad4df27b4547c25b896c8bf3b
DIFF: https://github.com/llvm/llvm-project/commit/de02a75e398415bad4df27b4547c25b896c8bf3b.diff

LOG: [PGO] Fix computation of function Hash

And bump its version number accordingly.

This is a patched recommit of 7c298c104bfe725d4315926a656263e8a5ac3054

Previous hash implementation was incorrectly passing an uint64_t, that got converted
to an uint8_t, to finalize the hash computation. This led to different functions
having the same hash if they only differ by the remaining statements, which is
incorrect.

Added a new test case that trivially tests that a small function change is
reflected in the hash value.

Not that as this patch fixes the hash computation, it would invalidate all hashes
computed before that patch applies, this is why we bumped the version number.

Update profile data hash entries due to hash function update, except for binary
version, in which case we keep the buggy behavior for backward compatibility.

Differential Revision: https://reviews.llvm.org/D79961

Added: 
    clang/test/Profile/Inputs/c-general.profdata.v5
    clang/test/Profile/c-collision.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/CodeGen/CodeGenPGO.cpp
    clang/test/Profile/Inputs/c-counter-overflows.proftext
    clang/test/Profile/Inputs/c-general.proftext
    clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
    clang/test/Profile/Inputs/cxx-rangefor.proftext
    clang/test/Profile/Inputs/cxx-throws.proftext
    clang/test/Profile/Inputs/misexpect-switch-default.proftext
    clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
    clang/test/Profile/Inputs/misexpect-switch.proftext
    clang/test/Profile/c-general.c
    llvm/include/llvm/ProfileData/InstrProf.h
    llvm/include/llvm/ProfileData/InstrProfData.inc

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c38ff0e36790..571b54904754 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -82,6 +82,10 @@ Non-comprehensive list of changes in this release
   linker. If the user links the program with the ``clang`` or ``clang-cl``
   drivers, the driver will pass this flag for them.
 
+- Clang's profile files generated through ``-fprofile-instr-generate`` are using
+  a fixed hashing algorithm that prevents some collision when loading
+  out-of-date profile informations. Clang can still read old profile files.
+
 New Compiler Flags
 ------------------
 

diff  --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 3c91a04d5464..e810f608ab78 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -52,9 +52,10 @@ void CodeGenPGO::setFuncName(llvm::Function *Fn) {
 enum PGOHashVersion : unsigned {
   PGO_HASH_V1,
   PGO_HASH_V2,
+  PGO_HASH_V3,
 
   // Keep this set to the latest hash version.
-  PGO_HASH_LATEST = PGO_HASH_V2
+  PGO_HASH_LATEST = PGO_HASH_V3
 };
 
 namespace {
@@ -122,7 +123,7 @@ class PGOHash {
     BinaryOperatorGE,
     BinaryOperatorEQ,
     BinaryOperatorNE,
-    // The preceding values are available with PGO_HASH_V2.
+    // The preceding values are available since PGO_HASH_V2.
 
     // Keep this last.  It's for the static assert that follows.
     LastHashType
@@ -144,7 +145,9 @@ static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
                                         CodeGenModule &CGM) {
   if (PGOReader->getVersion() <= 4)
     return PGO_HASH_V1;
-  return PGO_HASH_V2;
+  if (PGOReader->getVersion() <= 5)
+    return PGO_HASH_V2;
+  return PGO_HASH_V3;
 }
 
 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
@@ -288,7 +291,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
         return PGOHash::BinaryOperatorLAnd;
       if (BO->getOpcode() == BO_LOr)
         return PGOHash::BinaryOperatorLOr;
-      if (HashVersion == PGO_HASH_V2) {
+      if (HashVersion >= PGO_HASH_V2) {
         switch (BO->getOpcode()) {
         default:
           break;
@@ -310,7 +313,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
     }
     }
 
-    if (HashVersion == PGO_HASH_V2) {
+    if (HashVersion >= PGO_HASH_V2) {
       switch (S->getStmtClass()) {
       default:
         break;
@@ -747,13 +750,21 @@ uint64_t PGOHash::finalize() {
     return Working;
 
   // Check for remaining work in Working.
-  if (Working)
-    MD5.update(Working);
+  if (Working) {
+    // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
+    // is buggy because it converts a uint64_t into an array of uint8_t.
+    if (HashVersion < PGO_HASH_V3) {
+      MD5.update({(uint8_t)Working});
+    } else {
+      using namespace llvm::support;
+      uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+      MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+    }
+  }
 
   // Finalize the MD5 and return the hash.
   llvm::MD5::MD5Result Result;
   MD5.final(Result);
-  using namespace llvm::support;
   return Result.low();
 }
 

diff  --git a/clang/test/Profile/Inputs/c-counter-overflows.proftext b/clang/test/Profile/Inputs/c-counter-overflows.proftext
index b2e5dd1d77ae..4d0287c78705 100644
--- a/clang/test/Profile/Inputs/c-counter-overflows.proftext
+++ b/clang/test/Profile/Inputs/c-counter-overflows.proftext
@@ -1,5 +1,5 @@
 main
-10111551811706059223
+7779561829442898616
 8
 1
 68719476720

diff  --git a/clang/test/Profile/Inputs/c-general.profdata.v5 b/clang/test/Profile/Inputs/c-general.profdata.v5
new file mode 100644
index 000000000000..435ef2b6ef1d
Binary files /dev/null and b/clang/test/Profile/Inputs/c-general.profdata.v5 
diff er

diff  --git a/clang/test/Profile/Inputs/c-general.proftext b/clang/test/Profile/Inputs/c-general.proftext
index c0d03b4b755e..18ad2050fe1a 100644
--- a/clang/test/Profile/Inputs/c-general.proftext
+++ b/clang/test/Profile/Inputs/c-general.proftext
@@ -7,7 +7,7 @@ simple_loops
 75
 
 conditionals
-4190663230902537370
+4904767535850050386
 11
 1
 100
@@ -22,7 +22,7 @@ conditionals
 100
 
 early_exits
-8265526549255474475
+2880354649761471549
 9
 1
 0
@@ -35,7 +35,7 @@ early_exits
 0
 
 jumps
-15872630527555456493
+15051420506203462683
 22
 1
 1
@@ -61,7 +61,7 @@ jumps
 9
 
 switches
-11892326508727782373
+43242458792028222
 19
 1
 1
@@ -84,7 +84,7 @@ switches
 0
 
 big_switch
-16933280399284440835
+13144136522122330070
 17
 1
 32
@@ -117,7 +117,7 @@ boolean_operators
 50
 
 boolop_loops
-11270260636676715317
+12402604614320574815
 9
 1
 50
@@ -137,7 +137,7 @@ conditional_operator
 1
 
 do_fallthrough
-6898770640283947069
+8714614136504380050
 4
 1
 10

diff  --git a/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext b/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
index ef7f653811fb..d880663fed32 100644
--- a/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
+++ b/clang/test/Profile/Inputs/c-unprofiled-blocks.proftext
@@ -1,5 +1,5 @@
 never_called
-5644096560937528444
+6820425066224770721
 9
 0
 0
@@ -17,7 +17,7 @@ main
 1
 
 dead_code
-9636018207904213947
+5254464978620792806
 10
 1
 0

diff  --git a/clang/test/Profile/Inputs/cxx-rangefor.proftext b/clang/test/Profile/Inputs/cxx-rangefor.proftext
index b59729207859..d41205bbde14 100644
--- a/clang/test/Profile/Inputs/cxx-rangefor.proftext
+++ b/clang/test/Profile/Inputs/cxx-rangefor.proftext
@@ -1,5 +1,5 @@
 _Z9range_forv
-6169071350249721981
+8789831523895825398
 5
 1
 4

diff  --git a/clang/test/Profile/Inputs/cxx-throws.proftext b/clang/test/Profile/Inputs/cxx-throws.proftext
index 32fcf5d50cd4..043dea08c728 100644
--- a/clang/test/Profile/Inputs/cxx-throws.proftext
+++ b/clang/test/Profile/Inputs/cxx-throws.proftext
@@ -1,5 +1,5 @@
 _Z6throwsv
-340120998528097520
+18172607911962830854
 9
 1
 100

diff  --git a/clang/test/Profile/Inputs/misexpect-switch-default.proftext b/clang/test/Profile/Inputs/misexpect-switch-default.proftext
index 7b2d59781a1d..533da9176523 100644
--- a/clang/test/Profile/Inputs/misexpect-switch-default.proftext
+++ b/clang/test/Profile/Inputs/misexpect-switch-default.proftext
@@ -1,6 +1,6 @@
 main
 # Func Hash:
-8712453512413296413
+8734802134600123338
 # Num Counters:
 9
 # Counter Values:

diff  --git a/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext b/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
index 52b7b70cab9a..8e8db667d329 100644
--- a/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
+++ b/clang/test/Profile/Inputs/misexpect-switch-nonconst.proftext
@@ -1,6 +1,6 @@
 main
 # Func Hash:
-1965403898329309329
+3721743393642630379
 # Num Counters:
 10
 # Counter Values:

diff  --git a/clang/test/Profile/Inputs/misexpect-switch.proftext b/clang/test/Profile/Inputs/misexpect-switch.proftext
index ce4c96b3e3a6..ce41cd0552d3 100644
--- a/clang/test/Profile/Inputs/misexpect-switch.proftext
+++ b/clang/test/Profile/Inputs/misexpect-switch.proftext
@@ -1,6 +1,6 @@
 main
 # Func Hash:
-1965403898329309329
+872687477373597607
 # Num Counters:
 9
 # Counter Values:

diff  --git a/clang/test/Profile/c-collision.c b/clang/test/Profile/c-collision.c
new file mode 100644
index 000000000000..fabecd752b4e
--- /dev/null
+++ b/clang/test/Profile/c-collision.c
@@ -0,0 +1,22 @@
+// Test that a slight change in the code leads to a 
diff erent hash.
+// RUN: %clang_cc1 -UEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-NOEXTRA
+// RUN: %clang_cc1 -DEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-EXTRA
+
+// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 7156072912471487002,
+// CHECK-EXTRA:   @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 -4383447408116050035,
+
+extern int bar;
+void foo() {
+  if (bar) {
+  }
+  if (bar) {
+  }
+  if (bar) {
+    if (bar) {
+#ifdef EXTRA
+      if (bar) {
+      }
+#endif
+    }
+  }
+}

diff  --git a/clang/test/Profile/c-general.c b/clang/test/Profile/c-general.c
index 22b4288a5fd6..a7f03e872881 100644
--- a/clang/test/Profile/c-general.c
+++ b/clang/test/Profile/c-general.c
@@ -4,6 +4,7 @@
 
 // RUN: llvm-profdata merge %S/Inputs/c-general.proftext -o %t.profdata
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata | FileCheck -allow-deprecated-dag-overlap  -check-prefix=PGOUSE %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v5 | FileCheck -allow-deprecated-dag-overlap  -check-prefix=PGOUSE %s
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v3 | FileCheck -allow-deprecated-dag-overlap  -check-prefix=PGOUSE %s
 // Also check compatibility with older profiles.
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v1 | FileCheck -allow-deprecated-dag-overlap  -check-prefix=PGOUSE %s

diff  --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index cdd50d2d5ebc..62a0c6955708 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -979,6 +979,9 @@ enum ProfVersion {
   Version4 = 4,
   // In this version, the frontend PGO stable hash algorithm defaults to V2.
   Version5 = 5,
+  // In this version, the frontend PGO stable hash algorithm got fixed and
+  // may produce hashes 
diff erent from Version5.
+  Version6 = 6,
   // The current version is 5.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };

diff  --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 5e5f4ff941f3..a6913527e67f 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -657,7 +657,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 5
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 5
+#define INSTR_PROF_INDEX_VERSION 6
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 3
 


        


More information about the llvm-commits mailing list