[libcxx-commits] [libcxxabi] f0fcd42 - [libc++abi] Fix possible infinite loop in itanium demangler

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Tue Aug 17 15:16:29 PDT 2021


Author: Mikhail Borisov
Date: 2021-08-17T18:13:26-04:00
New Revision: f0fcd42495432670664a661e75e7cae7e904dd3e

URL: https://github.com/llvm/llvm-project/commit/f0fcd42495432670664a661e75e7cae7e904dd3e
DIFF: https://github.com/llvm/llvm-project/commit/f0fcd42495432670664a661e75e7cae7e904dd3e.diff

LOG: [libc++abi] Fix possible infinite loop in itanium demangler

A libfuzzer run has discovered some inputs for which the demangler does
not terminate. When minimized, it looks like this: _Zcv1BIRT_EIS1_E

Deciphered:

_Z
cv    - conversion operator

      * result type
 1B   - "B"
 I    - template args begin
  R   - reference type              <.
   T_ - forward template reference   |  *
 E    - template args end            |  |
                                     |  |
      * parameter type               |  |
 I    - template args begin          |  |
  S1_ - substitution #1              * <'
 E    - template args end

The reason is: template-parameter refs in conversion operator result type
create forward-references, while substitutions are instantly resolved via
back-references. Together these can create a reference loop. It causes an
infinite loop in ReferenceType::collapse().

I see three possible ways to avoid these loops:

1. check if resolving a forward reference creates a loop and reject the
   invalid input (hard to traverse AST at this point)
2. check if a substitution contains a malicious forward reference and
   reject the invalid input (hard to traverse AST at this point;
   substitutions are quite common: may affect performance; hard to
   clearly detect loops at this point)
3. detect loops in ReferenceType::collapse() (cannot reject the input)

This patch implements (3) as seemingly the least-impact change. As a
side effect, such invalid input strings are not rejected and produce
garbage, however there are already similar guards in
`if (Printing) return;` checks.

Fixes https://llvm.org/PR51407

Differential Revision: https://reviews.llvm.org/D107712

Added: 
    

Modified: 
    libcxxabi/src/demangle/ItaniumDemangle.h
    libcxxabi/test/test_demangle.pass.cpp
    llvm/include/llvm/Demangle/ItaniumDemangle.h

Removed: 
    


################################################################################
diff  --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 94c26ea210523..f73959868d706 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -651,8 +651,15 @@ class ReferenceType : public Node {
   // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
   // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
   // other combination collapses to a lvalue ref.
+  //
+  // A combination of a TemplateForwardReference and a back-ref Substitution
+  // from an ill-formed string may have created a cycle; use cycle detection to
+  // avoid looping forever.
   std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
     auto SoFar = std::make_pair(RK, Pointee);
+    // Track the chain of nodes for the Floyd's 'tortoise and hare'
+    // cycle-detection algorithm, since getSyntaxNode(S) is impure
+    PODSmallVector<const Node *, 8> Prev;
     for (;;) {
       const Node *SN = SoFar.second->getSyntaxNode(S);
       if (SN->getKind() != KReferenceType)
@@ -660,6 +667,14 @@ class ReferenceType : public Node {
       auto *RT = static_cast<const ReferenceType *>(SN);
       SoFar.second = RT->Pointee;
       SoFar.first = std::min(SoFar.first, RT->RK);
+
+      // The middle of Prev is the 'slow' pointer moving at half speed
+      Prev.push_back(SoFar.second);
+      if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) {
+        // Cycle detected
+        SoFar.second = nullptr;
+        break;
+      }
     }
     return SoFar;
   }
@@ -680,6 +695,8 @@ class ReferenceType : public Node {
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
     std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    if (!Collapsed.second)
+      return;
     Collapsed.second->printLeft(s);
     if (Collapsed.second->hasArray(s))
       s += " ";
@@ -693,6 +710,8 @@ class ReferenceType : public Node {
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
     std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    if (!Collapsed.second)
+      return;
     if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
       s += ")";
     Collapsed.second->printRight(s);

diff  --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index 1780e684cbb57..0744172a6cbb4 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -9,6 +9,11 @@
 // The demangler does not pass all these tests with the system dylibs on macOS.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
 
+// https://llvm.org/PR51407 was not fixed in some previously-released
+// demanglers, which causes them to run into the infinite loop.
+// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
+// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx11.0
+
 #include "support/timer.h"
 #include <cassert>
 #include <cstdio>
@@ -29844,6 +29849,9 @@ const char* cases[][2] =
 
     {"_ZN3xxx3yyyIvNS_1AILm0EEEZNS_2bb2cc2ddILNS_1eE1EEEvRKNS_1fERKNS_1g1hINS_1iEEERKNS_1jEfRKNS_1kEiPhEUlvE_JEEEvT1_DpT2_", "void xxx::yyy<void, xxx::A<0ul>, void xxx::bb::cc::dd<(xxx::e)1>(xxx::f const&, xxx::g::h<xxx::i> const&, xxx::j const&, float, xxx::k const&, int, unsigned char*)::'lambda'()>(void xxx::bb::cc::dd<(xxx::e)1>(xxx::f const&, xxx::g::h<xxx::i> const&, xxx::j const&, float, xxx::k const&, int, unsigned char*)::'lambda'())"},
 
+    // This should be invalid, but it is currently not recognized as such
+    // See https://llvm.org/PR51407
+    {"_Zcv1BIRT_EIS1_E", "operator B<><>"},
 };
 
 const unsigned N = sizeof(cases) / sizeof(cases[0]);

diff  --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 406100c2f0ddf..dd824a50359d1 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -651,8 +651,15 @@ class ReferenceType : public Node {
   // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
   // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
   // other combination collapses to a lvalue ref.
+  //
+  // A combination of a TemplateForwardReference and a back-ref Substitution
+  // from an ill-formed string may have created a cycle; use cycle detection to
+  // avoid looping forever.
   std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
     auto SoFar = std::make_pair(RK, Pointee);
+    // Track the chain of nodes for the Floyd's 'tortoise and hare'
+    // cycle-detection algorithm, since getSyntaxNode(S) is impure
+    PODSmallVector<const Node *, 8> Prev;
     for (;;) {
       const Node *SN = SoFar.second->getSyntaxNode(S);
       if (SN->getKind() != KReferenceType)
@@ -660,6 +667,14 @@ class ReferenceType : public Node {
       auto *RT = static_cast<const ReferenceType *>(SN);
       SoFar.second = RT->Pointee;
       SoFar.first = std::min(SoFar.first, RT->RK);
+
+      // The middle of `Prev` is the 'slow' pointer moving at half speed
+      Prev.push_back(SoFar.second);
+      if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) {
+        // Cycle detected
+        SoFar.second = nullptr;
+        break;
+      }
     }
     return SoFar;
   }
@@ -680,6 +695,8 @@ class ReferenceType : public Node {
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
     std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    if (!Collapsed.second)
+      return;
     Collapsed.second->printLeft(s);
     if (Collapsed.second->hasArray(s))
       s += " ";
@@ -693,6 +710,8 @@ class ReferenceType : public Node {
       return;
     SwapAndRestore<bool> SavePrinting(Printing, true);
     std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    if (!Collapsed.second)
+      return;
     if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
       s += ")";
     Collapsed.second->printRight(s);


        


More information about the libcxx-commits mailing list