[clang] febf5c9 - [Sema] Change order of displayed overloads in diagnostics

Mon Oct 23 04:06:38 PDT 2023

Author: Ilya Biryukov
Date: 2023-10-23T12:59:59+02:00
New Revision: febf5c97bba7910e796041c9518fce01f31ae826

URL: https://github.com/llvm/llvm-project/commit/febf5c97bba7910e796041c9518fce01f31ae826
DIFF: https://github.com/llvm/llvm-project/commit/febf5c97bba7910e796041c9518fce01f31ae826.diff

LOG: [Sema] Change order of displayed overloads in diagnostics

Make it a strict weak order.

Fixes #64121.

Current implementation uses the definition of ordering from the C++ Standard.
The definition provides only a partial order and cannot be used in sorting
algorithms.

The debug builds of libc++ are capable of detecting that problem
and this failure was found when building Clang with libc++ and
those extra checks enabled, see #64121.

The new ordering is a strict weak order and still
pushes most interesting functions to the start of the list.
In some cases, it leads to better results, e.g.

```
struct Foo {
  operator int();
  operator const char*();
};

void test() { Foo() - Foo(); }
```

Now produces a list with two most relevant builtin operators at the top,
i.e. `operator-(int, int)` and `operator-(const char*, const char*)`.
Previously `operator-(const char*, const char*)` was the first element,
but `operator-(int, int)` was only the 13th element in the output.
This is a consequence of `stable_sort` now being able to compare those
two candidates, which are indistinguishable in the semantic partial order
despite being two local minimums in their respective comparable
subsets.

However, new implementation does not take into account some aspects of
C++ semantics, e.g. which function template is more specialized. This
can also lead to worse ordering sometimes.

Reviewed By: #clang-language-wg, aaron.ballman

Differential Revision: https://reviews.llvm.org/D159351

Added: 
    clang/test/SemaCXX/overloaded-operators-display-order-crash.cpp

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/Sema/SemaOverload.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d697259f6f5eb9f..3c013835d0b35e9 100644

--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -356,6 +356,30 @@ Improvements to Clang's diagnostics
   nonportable construct that has 
diff erent semantics from a constant-sized
   array. (`#62836 <https://github.com/llvm/llvm-project/issues/62836>`_)
 
+- Clang changed the order in which it displays candidate functions on overloading failures.
+  Previously, Clang used definition of ordering from the C++ Standard. The order defined in
+  the Standard is partial and is not suited for sorting. Instead, Clang now uses a strict
+  order that still attempts to push more relevant functions to the top by comparing their
+  corresponding conversions. In some cases, this results in better order. E.g., for the
+  following code
+
+  .. code-block:: cpp
+      struct Foo {
+        operator int();
+        operator const char*();
+      };
+
+      void test() { Foo() - Foo(); }
+
+  Clang now produces a list with two most relevant builtin operators at the top,
+  i.e. ``operator-(int, int)`` and ``operator-(const char*, const char*)``.
+  Previously ``operator-(const char*, const char*)`` was the first element,
+  but ``operator-(int, int)`` was only the 13th element in the output.
+  However, new implementation does not take into account some aspects of
+  C++ semantics, e.g. which function template is more specialized. This
+  can sometimes lead to worse ordering.
+
+
 Bug Fixes in This Version
 -------------------------
 - Fixed an issue where a class template specialization whose declaration is

diff  --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index d57a7ad8f46859a..d9d7ce59b5fcb35 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -38,8 +38,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Casting.h"
 #include <algorithm>
+#include <cstddef>
 #include <cstdlib>
 #include <optional>
 
@@ -12017,6 +12019,7 @@ static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) {
 }
 
 namespace {
+
 struct CompareOverloadCandidatesForDisplay {
   Sema &S;
   SourceLocation Loc;
@@ -12054,13 +12057,9 @@ struct CompareOverloadCandidatesForDisplay {
     if (L->Viable) {
       if (!R->Viable) return true;
 
-      // TODO: introduce a tri-valued comparison for overload
-      // candidates.  Would be more worthwhile if we had a sort
-      // that could exploit it.
-      if (isBetterOverloadCandidate(S, *L, *R, SourceLocation(), CSK))
-        return true;
-      if (isBetterOverloadCandidate(S, *R, *L, SourceLocation(), CSK))
-        return false;
+      if (int Ord = CompareConversions(*L, *R))
+        return Ord < 0;
+      // Use other tie breakers.
     } else if (R->Viable)
       return false;
 
@@ -12112,30 +12111,8 @@ struct CompareOverloadCandidatesForDisplay {
         }
 
         // If there's any ordering between the defined conversions...
-        // FIXME: this might not be transitive.
-        assert(L->Conversions.size() == R->Conversions.size());
-
-        int leftBetter = 0;
-        unsigned I = (L->IgnoreObjectArgument || R->IgnoreObjectArgument);
-        for (unsigned E = L->Conversions.size(); I != E; ++I) {
-          switch (CompareImplicitConversionSequences(S, Loc,
-                                                     L->Conversions[I],
-                                                     R->Conversions[I])) {
-          case ImplicitConversionSequence::Better:
-            leftBetter++;
-            break;
-
-          case ImplicitConversionSequence::Worse:
-            leftBetter--;
-            break;
-
-          case ImplicitConversionSequence::Indistinguishable:
-            break;
-          }
-        }
-        if (leftBetter > 0) return true;
-        if (leftBetter < 0) return false;
-
+        if (int Ord = CompareConversions(*L, *R))
+          return Ord < 0;
       } else if (RFailureKind == ovl_fail_bad_conversion)
         return false;
 
@@ -12157,10 +12134,66 @@ struct CompareOverloadCandidatesForDisplay {
     SourceLocation RLoc = GetLocationForCandidate(R);
 
     // Put candidates without locations (e.g. builtins) at the end.
-    if (LLoc.isInvalid()) return false;
-    if (RLoc.isInvalid()) return true;
+    if (LLoc.isValid() && RLoc.isValid())
+      return S.SourceMgr.isBeforeInTranslationUnit(LLoc, RLoc);
+    if (LLoc.isValid() && !RLoc.isValid())
+      return true;
+    if (RLoc.isValid() && !LLoc.isValid())
+      return false;
+    assert(!LLoc.isValid() && !RLoc.isValid());
+    // For builtins and other functions without locations, fallback to the order
+    // in which they were added into the candidate set.
+    return L < R;
+  }
 
-    return S.SourceMgr.isBeforeInTranslationUnit(LLoc, RLoc);
+private:
+  struct ConversionSignals {
+    unsigned KindRank = 0;
+    ImplicitConversionRank Rank = ICR_Exact_Match;
+
+    static ConversionSignals ForSequence(ImplicitConversionSequence &Seq) {
+      ConversionSignals Sig;
+      Sig.KindRank = Seq.getKindRank();
+      if (Seq.isStandard())
+        Sig.Rank = Seq.Standard.getRank();
+      else if (Seq.isUserDefined())
+        Sig.Rank = Seq.UserDefined.After.getRank();
+      // We intend StaticObjectArgumentConversion to compare the same as
+      // StandardConversion with ICR_ExactMatch rank.
+      return Sig;
+    }
+
+    static ConversionSignals ForObjectArgument() {
+      // We intend StaticObjectArgumentConversion to compare the same as
+      // StandardConversion with ICR_ExactMatch rank. Default give us that.
+      return {};
+    }
+  };
+
+  // Returns -1 if conversions in L are considered better.
+  //          0 if they are considered indistinguishable.
+  //          1 if conversions in R are better.
+  int CompareConversions(const OverloadCandidate &L,
+                         const OverloadCandidate &R) {
+    // We cannot use `isBetterOverloadCandidate` because it is defined
+    // according to the C++ standard and provides a partial order, but we need
+    // a total order as this function is used in sort.
+    assert(L.Conversions.size() == R.Conversions.size());
+    for (unsigned I = 0, N = L.Conversions.size(); I != N; ++I) {
+      auto LS = L.IgnoreObjectArgument && I == 0
+                    ? ConversionSignals::ForObjectArgument()
+                    : ConversionSignals::ForSequence(L.Conversions[I]);
+      auto RS = R.IgnoreObjectArgument
+                    ? ConversionSignals::ForObjectArgument()
+                    : ConversionSignals::ForSequence(R.Conversions[I]);
+      if (std::tie(LS.KindRank, LS.Rank) != std::tie(RS.KindRank, RS.Rank))
+        return std::tie(LS.KindRank, LS.Rank) < std::tie(RS.KindRank, RS.Rank)
+                   ? -1
+                   : 1;
+    }
+    // FIXME: find a way to compare templates for being more or less
+    // specialized that provides a strict weak ordering.
+    return 0;
   }
 };
 }

diff  --git a/clang/test/SemaCXX/overloaded-operators-display-order-crash.cpp b/clang/test/SemaCXX/overloaded-operators-display-order-crash.cpp
new file mode 100644
index 000000000000000..5abd048f1d136e5
--- /dev/null
+++ b/clang/test/SemaCXX/overloaded-operators-display-order-crash.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+namespace ambig {
+  struct Foo {
+    operator int();
+    operator const char *();
+  };
+
+
+  void func(const char*, long);
+  void func(const char*, const char*);
+  void func(int, int);
+
+  bool doit(Foo x) {
+    func(x, x); // expected-error {{call to 'func' is ambiguous}}
+                // expected-note@* 3{{candidate}}
+    // Check that two functions with best conversions are at the top.
+    // CHECK: error: call to 'func' is ambiguous
+    // CHECK-NEXT: func(x, x)
+    // CHECK-NEXT: ^~~~
+    // CHECK-NEXT: note: candidate function
+    // CHECK-NEXT: void func(const char*, const char*)
+    // CHECK-NEXT: ^
+    // CHECK-NEXT: note: candidate function
+    // CHECK-NEXT: void func(int, int)
+  }
+}
+
+namespace bad_conversion {
+  struct Foo {
+    operator int();
+    operator const char *();
+  };
+
+
+  void func(double*, const char*, long);
+  void func(double*, const char*, const char*);
+  void func(double*, int, int);
+
+  bool doit(Foo x) {
+    func((int*)0, x, x); // expected-error {{no matching function for call to 'func'}}
+                         // expected-note@* 3{{candidate}}
+    // Check that two functions with best conversions are at the top.
+    // CHECK: error: no matching function for call to 'func'
+    // CHECK-NEXT: func((int*)0, x, x)
+    // CHECK-NEXT: ^~~~
+    // CHECK-NEXT: note: candidate function
+    // CHECK-NEXT: void func(double*, const char*, const char*)
+    // CHECK-NEXT: ^
+    // CHECK-NEXT: note: candidate function
+    // CHECK-NEXT: void func(double*, int, int)
+  }
+}