[clang-tools-extra] [llvm-profdata] Do not create numerical strings for MD5 function names read from a Sample Profile. (PR #66164)
Matthias Braun via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 4 11:52:43 PDT 2023
================
@@ -0,0 +1,222 @@
+//===--- ProfileFuncRef.h - Sample profile function name ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the StringRefOrHashCode class. It is to represent function
+// names in a sample profile, which can be in one of two forms - either a
+// regular string, or a 64-bit hash code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_PROFILEFUNCREF_H
+#define LLVM_PROFILEDATA_PROFILEFUNCREF_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+namespace sampleprof {
+
+/// This class represents a function name that is read from a sample profile. It
+/// comes with two forms: a string or a hash code. For efficient storage, a
+/// sample profile may store function names as 64-bit MD5 values, so when
+/// reading the profile, this class can represnet them without converting it to
+/// a string first.
+/// When representing a hash code, we utilize the Length field to store it, and
+/// Data is set to null. When representing a string, it is same as StringRef,
+/// and can be pointer-casted as one.
+/// We disallow implicit cast to StringRef because there are too many instances
+/// that it may cause break the code, such as using it in a StringMap.
+class ProfileFuncRef {
+
+ const char *Data = nullptr;
+
+ /// Use uint64_t instead of size_t so that it can also hold a MD5 value.
+ uint64_t LengthOrHashCode = 0;
+
+ /// Extension to memcmp to handle hash code representation. If both are hash
+ /// values, Lhs and Rhs are both null, function returns 0 (and needs an extra
+ /// comparison using getIntValue). If only one is hash code, it is considered
+ /// less than the StringRef one. Otherwise perform normal string comparison.
+ static int compareMemory(const char *Lhs, const char *Rhs, uint64_t Length) {
+ if (Lhs == Rhs)
+ return 0;
+ if (!Lhs)
+ return -1;
+ if (!Rhs)
+ return 1;
+ return ::memcmp(Lhs, Rhs, (size_t)Length);
+ }
+
+public:
+ ProfileFuncRef() = default;
+
+ /// Constructor from a StringRef. Check if Str is a number, which is generated
+ /// by converting a MD5 sample profile to a format that does not support MD5,
+ /// and if so, convert the numerical string to a hash code first. We assume
+ /// that no function name (from a profile) can be a pure number.
+ explicit ProfileFuncRef(StringRef Str)
+ : Data(Str.data()), LengthOrHashCode(Str.size()) {
+ // Only need to check for base 10 digits, fail faster if otherwise.
+ if (Str.size() > 0 && isdigit(Str[0]) &&
+ !Str.getAsInteger(10, LengthOrHashCode))
+ Data = nullptr;
+ }
+
+ /// Constructor from a hash code.
+ explicit ProfileFuncRef(uint64_t HashCode)
+ : Data(nullptr), LengthOrHashCode(HashCode) {
+ assert(HashCode != 0);
+ }
+
+ /// Check for equality. Similar to StringRef::equals, but will also cover for
+ /// the case where one or both are hash codes. Comparing their int values are
+ /// sufficient. A hash code ProfileFuncRef is considered not equal to a
+ /// StringRef ProfileFuncRef regardless of actual contents.
+ bool equals(const ProfileFuncRef &Other) const {
+ return LengthOrHashCode == Other.LengthOrHashCode &&
+ compareMemory(Data, Other.Data, LengthOrHashCode) == 0;
+ }
+
+ /// Total order comparison. If both ProfileFuncRef are StringRef, this is the
+ /// same as StringRef::compare. If one of them is StringRef, it is considered
+ /// greater than the hash code ProfileFuncRef. Otherwise this is the the same
+ /// as comparing their int values.
+ int compare(const ProfileFuncRef &Other) const {
+ auto Res = compareMemory(
+ Data, Other.Data, std::min(LengthOrHashCode, Other.LengthOrHashCode));
+ if (Res != 0)
+ return Res;
+ if (LengthOrHashCode == Other.LengthOrHashCode)
+ return 0;
+ return LengthOrHashCode < Other.LengthOrHashCode ? -1 : 1;
+ }
+
+ /// Convert to a string, usually for output purpose.
+ std::string str() const {
+ if (Data)
+ return std::string(Data, LengthOrHashCode);
+ if (LengthOrHashCode != 0)
+ return std::to_string(LengthOrHashCode);
+ return std::string();
+ }
+
+ /// Convert to StringRef, a backing buffer must be provided in case this
+ /// object represents a hash code, which will be converted to a string into
+ /// the buffer. If this object represents a StringRef, the buffer is not used.
+ StringRef stringRef(std::string &Buffer) const {
+ if (Data)
+ return StringRef(Data, LengthOrHashCode);
+ if (LengthOrHashCode != 0) {
+ Buffer = std::to_string(LengthOrHashCode);
+ return Buffer;
+ }
+ return StringRef();
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const ProfileFuncRef &Obj);
+
+ /// Get hash code of this object. Returns this object's hash code if it is
+ /// already representing one, otherwise returns the MD5 of its string content.
+ /// Note that it is not the same as std::hash because we want to keep the
+ /// consistency that the same sample profile function in string form or MD5
+ /// form has the same hash code.
+ uint64_t getHashCode() const {
+ if (Data)
+ return MD5Hash(StringRef(Data, LengthOrHashCode));
+ return LengthOrHashCode;
+ }
+
+ bool empty() const { return LengthOrHashCode == 0; }
+
+ /// Check if this object represents a StringRef, or a hash code.
+ bool isStringRef() const { return Data != nullptr; }
+};
+
+inline bool operator==(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return LHS.equals(RHS);
+}
+
+inline bool operator!=(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return !LHS.equals(RHS);
+}
+
+inline bool operator<(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return LHS.compare(RHS) < 0;
+}
+
+inline bool operator<=(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return LHS.compare(RHS) <= 0;
+}
+
+inline bool operator>(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return LHS.compare(RHS) > 0;
+}
+
+inline bool operator>=(const ProfileFuncRef &LHS, const ProfileFuncRef &RHS) {
+ return LHS.compare(RHS) >= 0;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const ProfileFuncRef &Obj) {
+ if (Obj.Data)
+ return OS << StringRef(Obj.Data, Obj.LengthOrHashCode);
+ if (Obj.LengthOrHashCode != 0)
+ return OS << Obj.LengthOrHashCode;
+ return OS;
+}
+
+inline uint64_t MD5Hash(const sampleprof::ProfileFuncRef &Obj) {
----------------
MatzeB wrote:
no need for explicit `sampleprof::`.
https://github.com/llvm/llvm-project/pull/66164
More information about the cfe-commits
mailing list