[llvm] ebe09e2 - [FSAFDO] Improve FS discriminator encoding

Rong Xu via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 9 23:19:41 PST 2023


Author: Rong Xu
Date: 2023-03-09T23:18:48-08:00
New Revision: ebe09e2a9556c411809f6c27f555299273442664

URL: https://github.com/llvm/llvm-project/commit/ebe09e2a9556c411809f6c27f555299273442664
DIFF: https://github.com/llvm/llvm-project/commit/ebe09e2a9556c411809f6c27f555299273442664.diff

LOG: [FSAFDO] Improve FS discriminator encoding

This change improves FS discriminators in the following ways:
(1) use call-stack debug information in the the to generate
discriminators: the same (src/line) DILs can now have same
discriminator value if they come from different call-stacks.
This effectively increases the usable discriminator values
for each round of FS discriminator pass.
(2) don't generate the FS discriminator for meta instructions
(i.e. instructions not emitted). This reduces the number
discriminators conflicts (for the case we run out of discriminator
bits for that pass).
(3) use less expensive hashing of xxHash64.

These improvements should bring better performance for FSAFDO
and they should be used by default. But this change creates
incompatible FS discriminators. For the iterative profile users,
they might see a performance drop in the first release with
this change (due to the fact that the profiles have the old
discriminators and the compiler uses the new discriminator).
We have measured that this is not more than 1.5% on several
benchmarks. Note the degradation should be gone in the second
release and one should expect a performance gain over the binary
without this change.

One possible solution to the iterative profile issue would be
separating discriminators for profile-use and the ones emitted to
the binary. This would require a mechanism to allow two sets of
discriminators to be maintained and then phasing out the first
approach. This is too much churn in the compiler and the
performance implications do not seem to be worth the effort.

Instead, we put the changes under an option so iterative profile
users can do a gradual rollout of this change. We will make the
option default value to true in a later patch and eventually
purge this option from the code base.

Differential Revision: https://reviews.llvm.org/D145171

Added: 
    llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo

Modified: 
    llvm/include/llvm/IR/DebugInfoMetadata.h
    llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
    llvm/lib/CodeGen/MIRFSDiscriminator.cpp
    llvm/lib/CodeGen/MIRSampleProfile.cpp
    llvm/lib/CodeGen/PseudoProbeInserter.cpp
    llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
    llvm/test/CodeGen/X86/fsafdo_test1.ll
    llvm/test/CodeGen/X86/fsafdo_test2.ll
    llvm/test/CodeGen/X86/fsafdo_test3.ll
    llvm/test/CodeGen/X86/fsafdo_test4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index dd4de53bc091..fb0a194e0c34 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -1599,254 +1599,6 @@ class DILocalScope : public DIScope {
   }
 };
 
-/// Debug location.
-///
-/// A debug location in source code, used for debug info and otherwise.
-class DILocation : public MDNode {
-  friend class LLVMContextImpl;
-  friend class MDNode;
-
-  DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
-             unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
-  ~DILocation() { dropAllReferences(); }
-
-  static DILocation *getImpl(LLVMContext &Context, unsigned Line,
-                             unsigned Column, Metadata *Scope,
-                             Metadata *InlinedAt, bool ImplicitCode,
-                             StorageType Storage, bool ShouldCreate = true);
-  static DILocation *getImpl(LLVMContext &Context, unsigned Line,
-                             unsigned Column, DILocalScope *Scope,
-                             DILocation *InlinedAt, bool ImplicitCode,
-                             StorageType Storage, bool ShouldCreate = true) {
-    return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
-                   static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
-                   ShouldCreate);
-  }
-
-  TempDILocation cloneImpl() const {
-    // Get the raw scope/inlinedAt since it is possible to invoke this on
-    // a DILocation containing temporary metadata.
-    return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
-                        getRawInlinedAt(), isImplicitCode());
-  }
-
-public:
-  // Disallow replacing operands.
-  void replaceOperandWith(unsigned I, Metadata *New) = delete;
-
-  DEFINE_MDNODE_GET(DILocation,
-                    (unsigned Line, unsigned Column, Metadata *Scope,
-                     Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
-                    (Line, Column, Scope, InlinedAt, ImplicitCode))
-  DEFINE_MDNODE_GET(DILocation,
-                    (unsigned Line, unsigned Column, DILocalScope *Scope,
-                     DILocation *InlinedAt = nullptr,
-                     bool ImplicitCode = false),
-                    (Line, Column, Scope, InlinedAt, ImplicitCode))
-
-  /// Return a (temporary) clone of this.
-  TempDILocation clone() const { return cloneImpl(); }
-
-  unsigned getLine() const { return SubclassData32; }
-  unsigned getColumn() const { return SubclassData16; }
-  DILocalScope *getScope() const { return cast<DILocalScope>(getRawScope()); }
-
-  DILocation *getInlinedAt() const {
-    return cast_or_null<DILocation>(getRawInlinedAt());
-  }
-
-  /// Check if the location corresponds to an implicit code.
-  /// When the ImplicitCode flag is true, it means that the Instruction
-  /// with this DILocation has been added by the front-end but it hasn't been
-  /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
-  /// bracket). It's useful for code coverage to not show a counter on "empty"
-  /// lines.
-  bool isImplicitCode() const { return SubclassData1; }
-  void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; }
-
-  DIFile *getFile() const { return getScope()->getFile(); }
-  StringRef getFilename() const { return getScope()->getFilename(); }
-  StringRef getDirectory() const { return getScope()->getDirectory(); }
-  std::optional<StringRef> getSource() const { return getScope()->getSource(); }
-
-  /// Get the scope where this is inlined.
-  ///
-  /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
-  /// location.
-  DILocalScope *getInlinedAtScope() const {
-    if (auto *IA = getInlinedAt())
-      return IA->getInlinedAtScope();
-    return getScope();
-  }
-
-  /// Get the DWARF discriminator.
-  ///
-  /// DWARF discriminators distinguish identical file locations between
-  /// instructions that are on 
diff erent basic blocks.
-  ///
-  /// There are 3 components stored in discriminator, from lower bits:
-  ///
-  /// Base discriminator: assigned by AddDiscriminators pass to identify IRs
-  ///                     that are defined by the same source line, but
-  ///                     
diff erent basic blocks.
-  /// Duplication factor: assigned by optimizations that will scale down
-  ///                     the execution frequency of the original IR.
-  /// Copy Identifier: assigned by optimizations that clones the IR.
-  ///                  Each copy of the IR will be assigned an identifier.
-  ///
-  /// Encoding:
-  ///
-  /// The above 3 components are encoded into a 32bit unsigned integer in
-  /// order. If the lowest bit is 1, the current component is empty, and the
-  /// next component will start in the next bit. Otherwise, the current
-  /// component is non-empty, and its content starts in the next bit. The
-  /// value of each components is either 5 bit or 12 bit: if the 7th bit
-  /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
-  /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
-  /// represent the component. Thus, the number of bits used for a component
-  /// is either 0 (if it and all the next components are empty); 1 - if it is
-  /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
-  /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
-  /// component is also capped at 0x1ff, even in the case when both first
-  /// components are 0, and we'd technically have 29 bits available.
-  ///
-  /// For precise control over the data being encoded in the discriminator,
-  /// use encodeDiscriminator/decodeDiscriminator.
-
-  inline unsigned getDiscriminator() const;
-
-  // For the regular discriminator, it stands for all empty components if all
-  // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
-  // default). Here we fully leverage the higher 29 bits for pseudo probe use.
-  // This is the format:
-  // [2:0] - 0x7
-  // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
-  // So if the lower 3 bits is non-zero and the others has at least one
-  // non-zero bit, it guarantees to be a pseudo probe discriminator
-  inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) {
-    return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8);
-  }
-
-  /// Returns a new DILocation with updated \p Discriminator.
-  inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
-
-  /// Returns a new DILocation with updated base discriminator \p BD. Only the
-  /// base discriminator is set in the new DILocation, the other encoded values
-  /// are elided.
-  /// If the discriminator cannot be encoded, the function returns std::nullopt.
-  inline std::optional<const DILocation *>
-  cloneWithBaseDiscriminator(unsigned BD) const;
-
-  /// Returns the duplication factor stored in the discriminator, or 1 if no
-  /// duplication factor (or 0) is encoded.
-  inline unsigned getDuplicationFactor() const;
-
-  /// Returns the copy identifier stored in the discriminator.
-  inline unsigned getCopyIdentifier() const;
-
-  /// Returns the base discriminator stored in the discriminator.
-  inline unsigned getBaseDiscriminator() const;
-
-  /// Returns a new DILocation with duplication factor \p DF * current
-  /// duplication factor encoded in the discriminator. The current duplication
-  /// factor is as defined by getDuplicationFactor().
-  /// Returns std::nullopt if encoding failed.
-  inline std::optional<const DILocation *>
-  cloneByMultiplyingDuplicationFactor(unsigned DF) const;
-
-  /// When two instructions are combined into a single instruction we also
-  /// need to combine the original locations into a single location.
-  /// When the locations are the same we can use either location.
-  /// When they 
diff er, we need a third location which is distinct from either.
-  /// If they share a common scope, use this scope and compare the line/column
-  /// pair of the locations with the common scope:
-  /// * if both match, keep the line and column;
-  /// * if only the line number matches, keep the line and set the column as 0;
-  /// * otherwise set line and column as 0.
-  /// If they do not share a common scope the location is ambiguous and can't be
-  /// represented in a line entry. In this case, set line and column as 0 and
-  /// use the scope of any location.
-  ///
-  /// \p LocA \p LocB: The locations to be merged.
-  static const DILocation *getMergedLocation(const DILocation *LocA,
-                                             const DILocation *LocB);
-
-  /// Try to combine the vector of locations passed as input in a single one.
-  /// This function applies getMergedLocation() repeatedly left-to-right.
-  ///
-  /// \p Locs: The locations to be merged.
-  static const DILocation *
-  getMergedLocations(ArrayRef<const DILocation *> Locs);
-
-  /// Return the masked discriminator value for an input discrimnator value D
-  /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
-  // Example: an input of (0x1FF, 7) returns 0xFF.
-  static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
-    return (D & getN1Bits(B));
-  }
-
-  /// Return the bits used for base discriminators.
-  static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); }
-
-  /// Returns the base discriminator for a given encoded discriminator \p D.
-  static unsigned
-  getBaseDiscriminatorFromDiscriminator(unsigned D,
-                                        bool IsFSDiscriminator = false) {
-    if (IsFSDiscriminator)
-      return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
-    return getUnsignedFromPrefixEncoding(D);
-  }
-
-  /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
-  /// have certain special case behavior (e.g. treating empty duplication factor
-  /// as the value '1').
-  /// This API, in conjunction with cloneWithDiscriminator, may be used to
-  /// encode the raw values provided.
-  ///
-  /// \p BD: base discriminator
-  /// \p DF: duplication factor
-  /// \p CI: copy index
-  ///
-  /// The return is std::nullopt if the values cannot be encoded in 32 bits -
-  /// for example, values for BD or DF larger than 12 bits. Otherwise, the
-  /// return is the encoded value.
-  static std::optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
-                                                     unsigned CI);
-
-  /// Raw decoder for values in an encoded discriminator D.
-  static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
-                                  unsigned &CI);
-
-  /// Returns the duplication factor for a given encoded discriminator \p D, or
-  /// 1 if no value or 0 is encoded.
-  static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
-    if (EnableFSDiscriminator)
-      return 1;
-    D = getNextComponentInDiscriminator(D);
-    unsigned Ret = getUnsignedFromPrefixEncoding(D);
-    if (Ret == 0)
-      return 1;
-    return Ret;
-  }
-
-  /// Returns the copy identifier for a given encoded discriminator \p D.
-  static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
-    return getUnsignedFromPrefixEncoding(
-        getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
-  }
-
-  Metadata *getRawScope() const { return getOperand(0); }
-  Metadata *getRawInlinedAt() const {
-    if (getNumOperands() == 2)
-      return getOperand(1);
-    return nullptr;
-  }
-
-  static bool classof(const Metadata *MD) {
-    return MD->getMetadataID() == DILocationKind;
-  }
-};
-
 /// Subprogram description.
 class DISubprogram : public DILocalScope {
   friend class LLVMContextImpl;
@@ -2115,6 +1867,266 @@ class DISubprogram : public DILocalScope {
   }
 };
 
+/// Debug location.
+///
+/// A debug location in source code, used for debug info and otherwise.
+class DILocation : public MDNode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
+             unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
+  ~DILocation() { dropAllReferences(); }
+
+  static DILocation *getImpl(LLVMContext &Context, unsigned Line,
+                             unsigned Column, Metadata *Scope,
+                             Metadata *InlinedAt, bool ImplicitCode,
+                             StorageType Storage, bool ShouldCreate = true);
+  static DILocation *getImpl(LLVMContext &Context, unsigned Line,
+                             unsigned Column, DILocalScope *Scope,
+                             DILocation *InlinedAt, bool ImplicitCode,
+                             StorageType Storage, bool ShouldCreate = true) {
+    return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
+                   static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
+                   ShouldCreate);
+  }
+
+  TempDILocation cloneImpl() const {
+    // Get the raw scope/inlinedAt since it is possible to invoke this on
+    // a DILocation containing temporary metadata.
+    return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
+                        getRawInlinedAt(), isImplicitCode());
+  }
+
+public:
+  // Disallow replacing operands.
+  void replaceOperandWith(unsigned I, Metadata *New) = delete;
+
+  DEFINE_MDNODE_GET(DILocation,
+                    (unsigned Line, unsigned Column, Metadata *Scope,
+                     Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
+                    (Line, Column, Scope, InlinedAt, ImplicitCode))
+  DEFINE_MDNODE_GET(DILocation,
+                    (unsigned Line, unsigned Column, DILocalScope *Scope,
+                     DILocation *InlinedAt = nullptr,
+                     bool ImplicitCode = false),
+                    (Line, Column, Scope, InlinedAt, ImplicitCode))
+
+  /// Return a (temporary) clone of this.
+  TempDILocation clone() const { return cloneImpl(); }
+
+  unsigned getLine() const { return SubclassData32; }
+  unsigned getColumn() const { return SubclassData16; }
+  DILocalScope *getScope() const { return cast<DILocalScope>(getRawScope()); }
+
+  /// Return the linkage name of Subprogram. If the linkage name is empty,
+  /// return scope name (the demangled name).
+  const StringRef getSubprogramLinkageName() const {
+    DISubprogram *SP = getScope()->getSubprogram();
+    if (!SP)
+      return "";
+    auto Name = SP->getLinkageName();
+    if (!Name.empty())
+      return Name;
+    return SP->getName();
+  }
+
+  DILocation *getInlinedAt() const {
+    return cast_or_null<DILocation>(getRawInlinedAt());
+  }
+
+  /// Check if the location corresponds to an implicit code.
+  /// When the ImplicitCode flag is true, it means that the Instruction
+  /// with this DILocation has been added by the front-end but it hasn't been
+  /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
+  /// bracket). It's useful for code coverage to not show a counter on "empty"
+  /// lines.
+  bool isImplicitCode() const { return SubclassData1; }
+  void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; }
+
+  DIFile *getFile() const { return getScope()->getFile(); }
+  StringRef getFilename() const { return getScope()->getFilename(); }
+  StringRef getDirectory() const { return getScope()->getDirectory(); }
+  std::optional<StringRef> getSource() const { return getScope()->getSource(); }
+
+  /// Get the scope where this is inlined.
+  ///
+  /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
+  /// location.
+  DILocalScope *getInlinedAtScope() const {
+    if (auto *IA = getInlinedAt())
+      return IA->getInlinedAtScope();
+    return getScope();
+  }
+
+  /// Get the DWARF discriminator.
+  ///
+  /// DWARF discriminators distinguish identical file locations between
+  /// instructions that are on 
diff erent basic blocks.
+  ///
+  /// There are 3 components stored in discriminator, from lower bits:
+  ///
+  /// Base discriminator: assigned by AddDiscriminators pass to identify IRs
+  ///                     that are defined by the same source line, but
+  ///                     
diff erent basic blocks.
+  /// Duplication factor: assigned by optimizations that will scale down
+  ///                     the execution frequency of the original IR.
+  /// Copy Identifier: assigned by optimizations that clones the IR.
+  ///                  Each copy of the IR will be assigned an identifier.
+  ///
+  /// Encoding:
+  ///
+  /// The above 3 components are encoded into a 32bit unsigned integer in
+  /// order. If the lowest bit is 1, the current component is empty, and the
+  /// next component will start in the next bit. Otherwise, the current
+  /// component is non-empty, and its content starts in the next bit. The
+  /// value of each components is either 5 bit or 12 bit: if the 7th bit
+  /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
+  /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
+  /// represent the component. Thus, the number of bits used for a component
+  /// is either 0 (if it and all the next components are empty); 1 - if it is
+  /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
+  /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
+  /// component is also capped at 0x1ff, even in the case when both first
+  /// components are 0, and we'd technically have 29 bits available.
+  ///
+  /// For precise control over the data being encoded in the discriminator,
+  /// use encodeDiscriminator/decodeDiscriminator.
+
+  inline unsigned getDiscriminator() const;
+
+  // For the regular discriminator, it stands for all empty components if all
+  // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
+  // default). Here we fully leverage the higher 29 bits for pseudo probe use.
+  // This is the format:
+  // [2:0] - 0x7
+  // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
+  // So if the lower 3 bits is non-zero and the others has at least one
+  // non-zero bit, it guarantees to be a pseudo probe discriminator
+  inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) {
+    return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8);
+  }
+
+  /// Returns a new DILocation with updated \p Discriminator.
+  inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
+
+  /// Returns a new DILocation with updated base discriminator \p BD. Only the
+  /// base discriminator is set in the new DILocation, the other encoded values
+  /// are elided.
+  /// If the discriminator cannot be encoded, the function returns std::nullopt.
+  inline std::optional<const DILocation *>
+  cloneWithBaseDiscriminator(unsigned BD) const;
+
+  /// Returns the duplication factor stored in the discriminator, or 1 if no
+  /// duplication factor (or 0) is encoded.
+  inline unsigned getDuplicationFactor() const;
+
+  /// Returns the copy identifier stored in the discriminator.
+  inline unsigned getCopyIdentifier() const;
+
+  /// Returns the base discriminator stored in the discriminator.
+  inline unsigned getBaseDiscriminator() const;
+
+  /// Returns a new DILocation with duplication factor \p DF * current
+  /// duplication factor encoded in the discriminator. The current duplication
+  /// factor is as defined by getDuplicationFactor().
+  /// Returns std::nullopt if encoding failed.
+  inline std::optional<const DILocation *>
+  cloneByMultiplyingDuplicationFactor(unsigned DF) const;
+
+  /// When two instructions are combined into a single instruction we also
+  /// need to combine the original locations into a single location.
+  /// When the locations are the same we can use either location.
+  /// When they 
diff er, we need a third location which is distinct from either.
+  /// If they share a common scope, use this scope and compare the line/column
+  /// pair of the locations with the common scope:
+  /// * if both match, keep the line and column;
+  /// * if only the line number matches, keep the line and set the column as 0;
+  /// * otherwise set line and column as 0.
+  /// If they do not share a common scope the location is ambiguous and can't be
+  /// represented in a line entry. In this case, set line and column as 0 and
+  /// use the scope of any location.
+  ///
+  /// \p LocA \p LocB: The locations to be merged.
+  static const DILocation *getMergedLocation(const DILocation *LocA,
+                                             const DILocation *LocB);
+
+  /// Try to combine the vector of locations passed as input in a single one.
+  /// This function applies getMergedLocation() repeatedly left-to-right.
+  ///
+  /// \p Locs: The locations to be merged.
+  static const DILocation *
+  getMergedLocations(ArrayRef<const DILocation *> Locs);
+
+  /// Return the masked discriminator value for an input discrimnator value D
+  /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
+  // Example: an input of (0x1FF, 7) returns 0xFF.
+  static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
+    return (D & getN1Bits(B));
+  }
+
+  /// Return the bits used for base discriminators.
+  static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); }
+
+  /// Returns the base discriminator for a given encoded discriminator \p D.
+  static unsigned
+  getBaseDiscriminatorFromDiscriminator(unsigned D,
+                                        bool IsFSDiscriminator = false) {
+    if (IsFSDiscriminator)
+      return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
+    return getUnsignedFromPrefixEncoding(D);
+  }
+
+  /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
+  /// have certain special case behavior (e.g. treating empty duplication factor
+  /// as the value '1').
+  /// This API, in conjunction with cloneWithDiscriminator, may be used to
+  /// encode the raw values provided.
+  ///
+  /// \p BD: base discriminator
+  /// \p DF: duplication factor
+  /// \p CI: copy index
+  ///
+  /// The return is std::nullopt if the values cannot be encoded in 32 bits -
+  /// for example, values for BD or DF larger than 12 bits. Otherwise, the
+  /// return is the encoded value.
+  static std::optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
+                                                     unsigned CI);
+
+  /// Raw decoder for values in an encoded discriminator D.
+  static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+                                  unsigned &CI);
+
+  /// Returns the duplication factor for a given encoded discriminator \p D, or
+  /// 1 if no value or 0 is encoded.
+  static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
+    if (EnableFSDiscriminator)
+      return 1;
+    D = getNextComponentInDiscriminator(D);
+    unsigned Ret = getUnsignedFromPrefixEncoding(D);
+    if (Ret == 0)
+      return 1;
+    return Ret;
+  }
+
+  /// Returns the copy identifier for a given encoded discriminator \p D.
+  static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
+    return getUnsignedFromPrefixEncoding(
+        getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
+  }
+
+  Metadata *getRawScope() const { return getOperand(0); }
+  Metadata *getRawInlinedAt() const {
+    if (getNumOperands() == 2)
+      return getOperand(1);
+    return nullptr;
+  }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DILocationKind;
+  }
+};
+
 class DILexicalBlockBase : public DILocalScope {
 protected:
   DILexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage,

diff  --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 3e75b4371033..0540a4c2ae02 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -32,11 +32,7 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
   SmallVector<InlineSite, 8> ReversedInlineStack;
   auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
   while (InlinedAt) {
-    const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
-    // Use linkage name for C++ if possible.
-    auto Name = SP->getLinkageName();
-    if (Name.empty())
-      Name = SP->getName();
+    auto Name = InlinedAt->getSubprogramLinkageName();
     // Use caching to avoid redundant md5 computation for build speed.
     uint64_t &CallerGuid = NameGuidMap[Name];
     if (!CallerGuid)

diff  --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index ad8a17f25ec5..e2166a4b484a 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
 
 using namespace llvm;
@@ -30,6 +31,13 @@ using namespace sampleprofutil;
 
 #define DEBUG_TYPE "mirfs-discriminators"
 
+// TODO(xur): Remove this option and related code once we make true as the
+// default.
+cl::opt<bool> ImprovedFSDiscriminator(
+    "improved-fs-discriminator", cl::Hidden, cl::init(false),
+    cl::desc("New FS discriminators encoding (incompatible with the original "
+             "encoding)"));
+
 char MIRAddFSDiscriminators::ID = 0;
 
 INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
@@ -42,11 +50,12 @@ FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) {
   return new MIRAddFSDiscriminators(P);
 }
 
+// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator.
 // Compute a hash value using debug line number, and the line numbers from the
 // inline stack.
-static uint64_t getCallStackHash(const MachineBasicBlock &BB,
-                                 const MachineInstr &MI,
-                                 const DILocation *DIL) {
+static uint64_t getCallStackHashV0(const MachineBasicBlock &BB,
+                                   const MachineInstr &MI,
+                                   const DILocation *DIL) {
   auto updateHash = [](const StringRef &Str) -> uint64_t {
     if (Str.empty())
       return 0;
@@ -62,6 +71,19 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
   return Ret;
 }
 
+static uint64_t getCallStackHash(const DILocation *DIL) {
+  auto hashCombine = [](const uint64_t Seed, const uint64_t Val) {
+    std::hash<uint64_t> Hasher;
+    return Seed ^ (Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2));
+  };
+  uint64_t Ret = 0;
+  for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+    Ret = hashCombine(Ret, xxHash64(ArrayRef<uint8_t>(DIL->getLine())));
+    Ret = hashCombine(Ret, xxHash64(DIL->getSubprogramLinkageName()));
+  }
+  return Ret;
+}
+
 // Traverse the CFG and assign FD discriminators. If two instructions
 // have the same lineno and discriminator, but residing in 
diff erent BBs,
 // the latter instruction will get a new discriminator value. The new
@@ -74,7 +96,8 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   bool Changed = false;
-  using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
+  using LocationDiscriminator =
+      std::tuple<StringRef, unsigned, unsigned, uint64_t>;
   using BBSet = DenseSet<const MachineBasicBlock *>;
   using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
   using LocationDiscriminatorCurrPassMap =
@@ -84,7 +107,12 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
   LocationDiscriminatorCurrPassMap LDCM;
 
   // Mask of discriminators before this pass.
-  unsigned BitMaskBefore = getN1Bits(LowBit);
+  // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator.
+  unsigned LowBitTemp = LowBit;
+  assert(LowBit > 0 && "LowBit in FSDiscriminator cannot be 0");
+  if (ImprovedFSDiscriminator)
+    LowBitTemp -= 1;
+  unsigned BitMaskBefore = getN1Bits(LowBitTemp);
   // Mask of discriminators including this pass.
   unsigned BitMaskNow = getN1Bits(HighBit);
   // Mask of discriminators for bits specific to this pass.
@@ -92,9 +120,14 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
   unsigned NumNewD = 0;
 
   LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
-                    << MF.getFunction().getName() << "\n");
+                    << MF.getFunction().getName() << " Highbit=" << HighBit
+                    << "\n");
+
   for (MachineBasicBlock &BB : MF) {
     for (MachineInstr &I : BB) {
+      if (ImprovedFSDiscriminator && I.isMetaInstruction()) {
+        continue;
+      }
       const DILocation *DIL = I.getDebugLoc().get();
       if (!DIL)
         continue;
@@ -102,7 +135,12 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
       if (LineNo == 0)
         continue;
       unsigned Discriminator = DIL->getDiscriminator();
-      LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator};
+      uint64_t CallStackHashVal = 0;
+      if (ImprovedFSDiscriminator)
+        CallStackHashVal = getCallStackHash(DIL);
+
+      LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator,
+                               CallStackHashVal};
       auto &BBMap = LDBM[LD];
       auto R = BBMap.insert(&BB);
       if (BBMap.size() == 1)
@@ -111,7 +149,8 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
       unsigned DiscriminatorCurrPass;
       DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
       DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
-      DiscriminatorCurrPass += getCallStackHash(BB, I, DIL);
+      if (!ImprovedFSDiscriminator)
+        DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL);
       DiscriminatorCurrPass &= BitMaskThisPass;
       unsigned NewD = Discriminator | DiscriminatorCurrPass;
       const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);

diff  --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 81d86621b9df..a821ad4ccea4 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -58,6 +58,7 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
                                   cl::init(false),
                                   cl::desc("View BFI after MIR loader"));
 
+extern cl::opt<bool> ImprovedFSDiscriminator;
 char MIRProfileLoaderPass::ID = 0;
 
 INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
@@ -165,6 +166,11 @@ class MIRProfileLoader final
   unsigned HighBit;
 
   bool ProfileIsValid = true;
+  ErrorOr<uint64_t> getInstWeight(const MachineInstr &MI) override {
+    if (ImprovedFSDiscriminator && MI.isMetaInstruction())
+      return std::error_code();
+    return getInstWeightImpl(MI);
+  }
 };
 
 template <>

diff  --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 86ea3ec67178..913e0035b046 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -128,10 +128,7 @@ class PseudoProbeInserter : public MachineFunctionPass {
 
 private:
   uint64_t getFuncGUID(Module *M, DILocation *DL) {
-    auto *SP = DL->getScope()->getSubprogram();
-    auto Name = SP->getLinkageName();
-    if (Name.empty())
-      Name = SP->getName();
+    auto Name = DL->getSubprogramLinkageName();
     return Function::getGUID(Name);
   }
 

diff  --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index a819bd9fb6e0..7a40ddee8179 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -56,11 +56,7 @@ static uint64_t getCallStackHash(const DILocation *DIL) {
   while (InlinedAt) {
     Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
     Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
-    const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
-    // Use linkage name for C++ if possible.
-    auto Name = SP->getLinkageName();
-    if (Name.empty())
-      Name = SP->getName();
+    auto Name = InlinedAt->getSubprogramLinkageName();
     Hash ^= MD5Hash(Name);
     InlinedAt = InlinedAt->getInlinedAt();
   }

diff  --git a/llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo
new file mode 100644
index 000000000000..dbb5dd0e8eca
--- /dev/null
+++ b/llvm/test/CodeGen/X86/Inputs/fsloader_v1.afdo
@@ -0,0 +1,35 @@
+work:42380966:1346190
+ 1: 1246499
+ 5: 1246499
+foo:28798256:4267
+ 0: 4267
+ 2.1: 255999
+ 4: 264627 bar:250018
+ 4.256: 269485 bar:278102
+ 4.512: 280297 bar:280933
+ 4.768: 278916 bar:267752
+ 5: 264627
+ 5.256: 269485
+ 5.512: 260670
+ 5.768: 278916
+ 6: 11541
+ 6.256: 278916 work:284547
+ 6.512: 260670 work:249428
+ 6.768: 11541
+ 7: 272442
+ 7.256: 283590
+ 7.512: 234082
+ 7.768: 279149
+ 8: 11541
+ 8.256: 283590 work:305061
+ 8.512: 279149 work:281368
+ 8.768: 234082 work:225786
+ 10: 4050
+bar:9504180:1076805
+ 2: 1056020
+ 3: 1056020
+main:20360:0
+ 0: 0
+ 2.1: 4045
+ 3: 4156 foo:4267
+ 5: 0

diff  --git a/llvm/test/CodeGen/X86/fsafdo_test1.ll b/llvm/test/CodeGen/X86/fsafdo_test1.ll
index dae098dbd630..b5ae3915294c 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test1.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test1.ll
@@ -1,17 +1,20 @@
-; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefixes=V0,V01
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefixes=V1,V01
 ;
 ; Check that fs-afdo discriminators are generated.
-; CHECK: .loc    1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3
-; ChECK: .loc    1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
-; CHECK: .loc    1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5
-; CHECK: .loc    1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3
+; V01: .loc    1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3
+; V01: .loc    1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
+; V0: .loc    1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5
+; V0: .loc    1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3
+; V1: .loc    1 9 5 is_stmt 0 discriminator 258 # foo.c:9:5
+; V1: .loc    1 7 3 is_stmt 1 discriminator 258 # foo.c:7:3
 ; Check that variable __llvm_fs_discriminator__ is generated.
-; CHECK: .type   __llvm_fs_discriminator__, at object # @__llvm_fs_discriminator__
-; CHECK: .section        .rodata,"a", at progbits
-; CHECK: .weak   __llvm_fs_discriminator__
-; CHECK: __llvm_fs_discriminator__:
-; CHECK: .byte   1
-; CHECK: .size   __llvm_fs_discriminator__, 1
+; V01: .type   __llvm_fs_discriminator__, at object # @__llvm_fs_discriminator__
+; V01: .section        .rodata,"a", at progbits
+; V01: .weak   __llvm_fs_discriminator__
+; V01: __llvm_fs_discriminator__:
+; V01: .byte   1
+; V01: .size   __llvm_fs_discriminator__, 1
 
 target triple = "x86_64-unknown-linux-gnu"
 

diff  --git a/llvm/test/CodeGen/X86/fsafdo_test2.ll b/llvm/test/CodeGen/X86/fsafdo_test2.ll
index c0ae5d2f3676..c473e0ccacbf 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test2.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test2.ll
@@ -1,7 +1,10 @@
 ; REQUIRES: asserts
-; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
-; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo
-; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefixes=V0,V01
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV0,LOADER
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefixes=V1,V01
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV1,LOADER
 ;
 ;;
 ;; C source code for the test (compiler at -O3):
@@ -41,18 +44,21 @@
 ;; }
 ;;
 ;; Check that fs-afdo discriminators are generated.
-; CHECK: .loc    1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9
-; CHECK: .loc    1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9
-; CHECK: .loc    1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9
-; CHECK: .loc    1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9
+; V01: .loc    1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9
+; V0: .loc    1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9
+; V0: .loc    1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9
+; V0: .loc    1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9
+; V1: .loc    1 23 9 is_stmt 0 discriminator 257 # unroll.c:23:9
+; V1: .loc    1 23 9 is_stmt 0 discriminator 513 # unroll.c:23:9
+; V1: .loc    1 23 9 is_stmt 0 discriminator 769 # unroll.c:23:9
 ;;
 ;; Check that variable __llvm_fs_discriminator__ is generated.
-; CHECK: .type   __llvm_fs_discriminator__, at object # @__llvm_fs_discriminator__
-; CHECK: .section        .rodata,"a", at progbits
-; CHECK: .weak   __llvm_fs_discriminator__
-; CHECK: __llvm_fs_discriminator__:
-; CHECK: .byte   1
-; CHECK: .size   __llvm_fs_discriminator__, 1
+; V01: .type   __llvm_fs_discriminator__, at object # @__llvm_fs_discriminator__
+; V01: .section        .rodata,"a", at progbits
+; V01: .weak   __llvm_fs_discriminator__
+; V01: __llvm_fs_discriminator__:
+; V01: .byte   1
+; V01: .size   __llvm_fs_discriminator__, 1
 
 ;; Check that new branch probs are generated.
 ; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
@@ -63,16 +69,19 @@
 ; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35%
 ; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00%
 ; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00%
-; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
-; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADERV0: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADERV1: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
+; LOADERV0: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADERV1: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
 ; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57%
 ; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43%
-; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
-; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
+; LOADERV0: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
+; LOADERV1: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADERV0: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
+; LOADERV1: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
 ; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46%
 ; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54%
 
-
 target triple = "x86_64-unknown-linux-gnu"
 
 @sum = dso_local local_unnamed_addr global i32 0, align 4

diff  --git a/llvm/test/CodeGen/X86/fsafdo_test3.ll b/llvm/test/CodeGen/X86/fsafdo_test3.ll
index 4a5ad040e932..bbcc3ff59ec3 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test3.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test3.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo
-; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefix=BFI
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV0
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false -print-machine-bfi -print-bfi-func-name=foo -print-before=fs-profile-loader -stop-after=fs-profile-loader < %s 2>&1 | FileCheck %s --check-prefixes=BFI,BFIV1
 ;
 ;;
 ;; C source code for the test (compiler at -O3):
@@ -63,7 +65,7 @@
 ;
 ; BFI: # *** IR Dump Before SampleFDO loader in MIR (fs-profile-loader) ***:
 ; BFI: # End machine code for function foo.
-;
+; BFI-EMPTY:
 ; BFI: block-frequency-info: foo
 ; BFI:  - BB0[entry]: float = 1.0, int = 8, count = 4268
 ; BFI:  - BB1[for.cond1.preheader]: float = 66.446, int = 531, count = 283289
@@ -75,11 +77,13 @@
 ; BFI:  - BB7[if.end.1]: float = 66.446, int = 531, count = 283289
 ; BFI:  - BB8[if.then7.1]: float = 66.446, int = 531, count = 283289
 ; BFI:  - BB9[if.end9.1]: float = 66.446, int = 531, count = 283289
-; BFI:  - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204
+; BFIV0:  - BB10[if.then.2]: float = 2.7041, int = 21, count = 11204
+; BFIV1:  - BB10[if.then.2]: float = 61.075, int = 488, count = 260348
 ; BFI:  - BB11[if.end.2]: float = 66.446, int = 531, count = 283289
 ; BFI:  - BB12[if.then7.2]: float = 65.405, int = 523, count = 279021
 ; BFI:  - BB13[if.end9.2]: float = 66.446, int = 531, count = 283289
-; BFI:  - BB14[if.then.3]: float = 61.075, int = 488, count = 260348
+; BFIV0:  - BB14[if.then.3]: float = 61.075, int = 488, count = 260348
+; BFIV1:  - BB14[if.then.3]: float = 2.7041, int = 21, count = 11204
 ; BFI:  - BB15[if.end.3]: float = 66.446, int = 531, count = 283289
 ; BFI:  - BB16[if.then7.3]: float = 54.846, int = 438, count = 233673
 ; BFI:  - BB17[if.end9.3]: float = 66.446, int = 531, count = 283289

diff  --git a/llvm/test/CodeGen/X86/fsafdo_test4.ll b/llvm/test/CodeGen/X86/fsafdo_test4.ll
index b5a3a591e9fd..6a22ea982241 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test4.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test4.ll
@@ -1,10 +1,11 @@
-; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s
+; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s
 ;
 ; Check that fs-afdo discriminators are NOT generated, as debugInfoForProfiling is false (not set).
 ; CHECK: .loc    1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3
 ; CHECK: .loc    1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
-; CHECK-NOT: .loc    1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5
-; CHECK-NOT: .loc    1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3
+; CHECK-NOT: .loc    1 9 5 is_stmt 0 discriminator
+; CHECK-NOT: .loc    1 7 3 is_stmt 1 discriminator
 ; Check that variable __llvm_fs_discriminator__ is NOT generated.
 ; CHECK-NOT: __llvm_fs_discriminator__:
 


        


More information about the llvm-commits mailing list