[Lldb-commits] [lldb] r374094 - exception handling in PythonDataObjects.

Lawrence D'Anna via lldb-commits lldb-commits at lists.llvm.org
Tue Oct 8 10:56:18 PDT 2019


Author: lawrence_danna
Date: Tue Oct  8 10:56:18 2019
New Revision: 374094

URL: http://llvm.org/viewvc/llvm-project?rev=374094&view=rev
Log:
exception handling in PythonDataObjects.

Summary:
Python APIs nearly all can return an exception.   They do this
by returning NULL, or -1, or some such value and setting
the exception state with PyErr_Set*().   Exceptions must be
handled before further python API functions are called.   Failure
to do so will result in asserts on debug builds of python.
It will also sometimes, but not usually result in crashes of
release builds.

Nearly everything in PythonDataObjects.h needs to be updated
to account for this.   This patch doesn't fix everything,
but it does introduce some new methods using Expected<>
return types that are safe to use.

split off from https://reviews.llvm.org/D68188

Reviewers: JDevlieghere, jasonmolenda, labath, zturner

Reviewed By: labath

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D68547

Modified:
    lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
    lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h

Modified: lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp?rev=374094&r1=374093&r2=374094&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp (original)
+++ lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp Tue Oct  8 10:56:18 2019
@@ -18,6 +18,7 @@
 #include "lldb/Host/File.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Interpreter/ScriptInterpreter.h"
+#include "lldb/Utility/Log.h"
 #include "lldb/Utility/Stream.h"
 
 #include "llvm/ADT/StringSwitch.h"
@@ -28,6 +29,22 @@
 
 using namespace lldb_private;
 using namespace lldb;
+using namespace lldb_private::python;
+using llvm::Error;
+using llvm::Expected;
+
+template <> Expected<bool> python::As<bool>(Expected<PythonObject> &&obj) {
+  if (!obj)
+    return obj.takeError();
+  return obj.get().IsTrue();
+}
+
+template <>
+Expected<long long> python::As<long long>(Expected<PythonObject> &&obj) {
+  if (!obj)
+    return obj.takeError();
+  return obj.get().AsLongLong();
+}
 
 void StructuredPythonObject::Serialize(llvm::json::OStream &s) const {
   s.value(llvm::formatv("Python Obj: {0:X}", GetValue()).str());
@@ -167,12 +184,6 @@ PythonObject PythonObject::GetAttributeV
                       PyObject_GetAttr(m_py_obj, py_attr.get()));
 }
 
-bool PythonObject::IsNone() const { return m_py_obj == Py_None; }
-
-bool PythonObject::IsValid() const { return m_py_obj != nullptr; }
-
-bool PythonObject::IsAllocated() const { return IsValid() && !IsNone(); }
-
 StructuredData::ObjectSP PythonObject::CreateStructuredObject() const {
   switch (GetObjectType()) {
   case PyObjectType::Dictionary:
@@ -334,6 +345,17 @@ StructuredData::StringSP PythonByteArray
 
 // PythonString
 
+Expected<PythonString> PythonString::FromUTF8(llvm::StringRef string) {
+#if PY_MAJOR_VERSION >= 3
+  PyObject *str = PyUnicode_FromStringAndSize(string.data(), string.size());
+#else
+  PyObject *str = PyString_FromStringAndSize(string.data(), string.size());
+#endif
+  if (!str)
+    return llvm::make_error<PythonException>();
+  return Take<PythonString>(str);
+}
+
 PythonString::PythonString(PyRefType type, PyObject *py_obj) : PythonObject() {
   Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string
 }
@@ -342,10 +364,6 @@ PythonString::PythonString(llvm::StringR
   SetString(string);
 }
 
-PythonString::PythonString(const char *string) : PythonObject() {
-  SetString(llvm::StringRef(string));
-}
-
 PythonString::PythonString() : PythonObject() {}
 
 PythonString::~PythonString() {}
@@ -376,8 +394,12 @@ void PythonString::Reset(PyRefType type,
   // In Python 2, Don't store PyUnicode objects directly, because we need
   // access to their underlying character buffers which Python 2 doesn't
   // provide.
-  if (PyUnicode_Check(py_obj))
-    result.Reset(PyRefType::Owned, PyUnicode_AsUTF8String(result.get()));
+  if (PyUnicode_Check(py_obj)) {
+    PyObject *s = PyUnicode_AsUTF8String(result.get());
+    if (s == NULL)
+      PyErr_Clear();
+    result.Reset(PyRefType::Owned, s);
+  }
 #endif
   // Calling PythonObject::Reset(const PythonObject&) will lead to stack
   // overflow since it calls back into the virtual implementation.
@@ -385,8 +407,17 @@ void PythonString::Reset(PyRefType type,
 }
 
 llvm::StringRef PythonString::GetString() const {
+  auto s = AsUTF8();
+  if (!s) {
+    llvm::consumeError(s.takeError());
+    return llvm::StringRef("");
+  }
+  return s.get();
+}
+
+Expected<llvm::StringRef> PythonString::AsUTF8() const {
   if (!IsValid())
-    return llvm::StringRef();
+    return nullDeref();
 
   Py_ssize_t size;
   const char *data;
@@ -394,10 +425,16 @@ llvm::StringRef PythonString::GetString(
 #if PY_MAJOR_VERSION >= 3
   data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
 #else
-  char *c;
-  PyString_AsStringAndSize(m_py_obj, &c, &size);
+  char *c = NULL;
+  int r = PyString_AsStringAndSize(m_py_obj, &c, &size);
+  if (r < 0)
+    c = NULL;
   data = c;
 #endif
+
+  if (!data)
+    return exception();
+
   return llvm::StringRef(data, size);
 }
 
@@ -413,13 +450,13 @@ size_t PythonString::GetSize() const {
 }
 
 void PythonString::SetString(llvm::StringRef string) {
-#if PY_MAJOR_VERSION >= 3
-  PyObject *unicode = PyUnicode_FromStringAndSize(string.data(), string.size());
-  PythonObject::Reset(PyRefType::Owned, unicode);
-#else
-  PyObject *str = PyString_FromStringAndSize(string.data(), string.size());
-  PythonObject::Reset(PyRefType::Owned, str);
-#endif
+  auto s = FromUTF8(string);
+  if (!s) {
+    llvm::consumeError(s.takeError());
+    Reset();
+  } else {
+    PythonObject::Reset(std::move(s.get()));
+  }
 }
 
 StructuredData::StringSP PythonString::CreateStructuredString() const {
@@ -826,9 +863,23 @@ PythonModule PythonModule::AddModule(llv
   return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str()));
 }
 
-PythonModule PythonModule::ImportModule(llvm::StringRef module) {
-  std::string str = module.str();
-  return PythonModule(PyRefType::Owned, PyImport_ImportModule(str.c_str()));
+Expected<PythonModule> PythonModule::Import(const char *name) {
+  PyObject *mod = PyImport_ImportModule(name);
+  if (!mod)
+    return exception();
+  return Take<PythonModule>(mod);
+}
+
+Expected<PythonObject> PythonModule::Get(const char *name) {
+  if (!IsValid())
+    return nullDeref();
+  PyObject *dict = PyModule_GetDict(m_py_obj);
+  if (!dict)
+    return exception();
+  PyObject *item = PyDict_GetItemString(dict, name);
+  if (!item)
+    return exception();
+  return Retain<PythonObject>(item);
 }
 
 bool PythonModule::Check(PyObject *py_obj) {
@@ -1045,4 +1096,58 @@ FileUP PythonFile::GetUnderlyingFile() c
   return file;
 }
 
+const char *PythonException::toCString() const {
+  if (!m_repr_bytes)
+    return "unknown exception";
+  return PyBytes_AS_STRING(m_repr_bytes);
+}
+
+PythonException::PythonException(const char *caller) {
+  assert(PyErr_Occurred());
+  m_exception_type = m_exception = m_traceback = m_repr_bytes = NULL;
+  PyErr_Fetch(&m_exception_type, &m_exception, &m_traceback);
+  PyErr_NormalizeException(&m_exception_type, &m_exception, &m_traceback);
+  PyErr_Clear();
+  if (m_exception) {
+    PyObject *repr = PyObject_Repr(m_exception);
+    if (repr) {
+      m_repr_bytes = PyUnicode_AsEncodedString(repr, "utf-8", nullptr);
+      if (!m_repr_bytes) {
+        PyErr_Clear();
+      }
+      Py_XDECREF(repr);
+    } else {
+      PyErr_Clear();
+    }
+  }
+  Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SCRIPT);
+  if (caller)
+    LLDB_LOGF(log, "%s failed with exception: %s", caller, toCString());
+  else
+    LLDB_LOGF(log, "python exception: %s", toCString());
+}
+void PythonException::Restore() {
+  if (m_exception_type && m_exception) {
+    PyErr_Restore(m_exception_type, m_exception, m_traceback);
+  } else {
+    PyErr_SetString(PyExc_Exception, toCString());
+  }
+  m_exception_type = m_exception = m_traceback = NULL;
+}
+
+PythonException::~PythonException() {
+  Py_XDECREF(m_exception_type);
+  Py_XDECREF(m_exception);
+  Py_XDECREF(m_traceback);
+  Py_XDECREF(m_repr_bytes);
+}
+
+void PythonException::log(llvm::raw_ostream &OS) const { OS << toCString(); }
+
+std::error_code PythonException::convertToErrorCode() const {
+  return llvm::inconvertibleErrorCode();
+}
+
+char PythonException::ID = 0;
+
 #endif

Modified: lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h?rev=374094&r1=374093&r2=374094&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h (original)
+++ lldb/trunk/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h Tue Oct  8 10:56:18 2019
@@ -6,6 +6,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+//
+// !! FIXME FIXME FIXME !!
+//
+// Python APIs nearly all can return an exception.   They do this
+// by returning NULL, or -1, or some such value and setting
+// the exception state with PyErr_Set*().   Exceptions must be
+// handled before further python API functions are called.   Failure
+// to do so will result in asserts on debug builds of python.
+// It will also sometimes, but not usually result in crashes of
+// release builds.
+//
+// Nearly all the code in this header does not handle python exceptions
+// correctly.  It should all be converted to return Expected<> or
+// Error types to capture the exception.
+//
+// Everything in this file except functions that return Error or
+// Expected<> is considered deprecated and should not be
+// used in new code.  If you need to use it, fix it first.
+//
+
 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
 
@@ -21,11 +41,13 @@
 
 namespace lldb_private {
 
+class PythonObject;
 class PythonBytes;
 class PythonString;
 class PythonList;
 class PythonDictionary;
 class PythonInteger;
+class PythonException;
 
 class StructuredPythonObject : public StructuredData::Generic {
 public:
@@ -72,8 +94,67 @@ enum class PyRefType {
             // not call Py_INCREF.
 };
 
+namespace python {
+
+// Take a reference that you already own, and turn it into
+// a PythonObject.
+//
+// Most python API methods will return a +1 reference
+// if they succeed or NULL if and only if
+// they set an exception.   Use this to collect such return
+// values, after checking for NULL.
+//
+// If T is not just PythonObject, then obj must be already be
+// checked to be of the correct type.
+template <typename T> T Take(PyObject *obj) {
+  assert(obj);
+  assert(!PyErr_Occurred());
+  T thing(PyRefType::Owned, obj);
+  assert(thing.IsValid());
+  return std::move(thing);
+}
+
+// Retain a reference you have borrowed, and turn it into
+// a PythonObject.
+//
+// A minority of python APIs return a borrowed reference
+// instead of a +1.   They will also return NULL if and only
+// if they set an exception.   Use this to collect such return
+// values, after checking for NULL.
+//
+// If T is not just PythonObject, then obj must be already be
+// checked to be of the correct type.
+template <typename T> T Retain(PyObject *obj) {
+  assert(obj);
+  assert(!PyErr_Occurred());
+  T thing(PyRefType::Borrowed, obj);
+  assert(thing.IsValid());
+  return std::move(thing);
+}
+
+} // namespace python
+
 enum class PyInitialValue { Invalid, Empty };
 
+template <typename T, typename Enable = void> struct PythonFormat;
+
+template <> struct PythonFormat<unsigned long long> {
+  static constexpr char format = 'K';
+  static auto get(unsigned long long value) { return value; }
+};
+
+template <> struct PythonFormat<long long> {
+  static constexpr char format = 'L';
+  static auto get(long long value) { return value; }
+};
+
+template <typename T>
+struct PythonFormat<
+    T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {
+  static constexpr char format = 'O';
+  static auto get(const T &value) { return value.get(); }
+};
+
 class PythonObject {
 public:
   PythonObject() : m_py_obj(nullptr) {}
@@ -84,14 +165,19 @@ public:
 
   PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); }
 
+  PythonObject(PythonObject &&rhs) {
+    m_py_obj = rhs.m_py_obj;
+    rhs.m_py_obj = nullptr;
+  }
+
   virtual ~PythonObject() { Reset(); }
 
   void Reset() {
     // Avoid calling the virtual method since it's not necessary
     // to actually validate the type of the PyObject if we're
     // just setting to null.
-    if (Py_IsInitialized())
-      Py_XDECREF(m_py_obj);
+    if (m_py_obj && Py_IsInitialized())
+      Py_DECREF(m_py_obj);
     m_py_obj = nullptr;
   }
 
@@ -110,6 +196,8 @@ public:
   // PyRefType doesn't make sense, and the copy constructor should be used.
   void Reset(PyRefType type, const PythonObject &ref) = delete;
 
+  // FIXME We shouldn't have virtual anything.  PythonObject should be a
+  // strictly pass-by-value type.
   virtual void Reset(PyRefType type, PyObject *py_obj) {
     if (py_obj == m_py_obj)
       return;
@@ -123,7 +211,7 @@ public:
     // an owned reference by incrementing it.  If it is an owned
     // reference (for example the caller allocated it with PyDict_New()
     // then we must *not* increment it.
-    if (Py_IsInitialized() && type == PyRefType::Borrowed)
+    if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)
       Py_XINCREF(m_py_obj);
   }
 
@@ -149,6 +237,17 @@ public:
     return *this;
   }
 
+  void Reset(PythonObject &&other) {
+    Reset();
+    m_py_obj = other.m_py_obj;
+    other.m_py_obj = nullptr;
+  }
+
+  PythonObject &operator=(PythonObject &&other) {
+    Reset(std::move(other));
+    return *this;
+  }
+
   PyObjectType GetObjectType() const;
 
   PythonString Repr() const;
@@ -174,11 +273,13 @@ public:
 
   PythonObject GetAttributeValue(llvm::StringRef attribute) const;
 
-  bool IsValid() const;
+  bool IsNone() const { return m_py_obj == Py_None; }
 
-  bool IsAllocated() const;
+  bool IsValid() const { return m_py_obj != nullptr; }
 
-  bool IsNone() const;
+  bool IsAllocated() const { return IsValid() && !IsNone(); }
+
+  explicit operator bool() const { return IsValid() && !IsNone(); }
 
   template <typename T> T AsType() const {
     if (!T::Check(m_py_obj))
@@ -189,9 +290,92 @@ public:
   StructuredData::ObjectSP CreateStructuredObject() const;
 
 protected:
+  static llvm::Error nullDeref() {
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "A NULL PyObject* was dereferenced");
+  }
+  static llvm::Error exception(const char *s = nullptr) {
+    return llvm::make_error<PythonException>(s);
+  }
+
+public:
+  template <typename... T>
+  llvm::Expected<PythonObject> CallMethod(const char *name,
+                                          const T &... t) const {
+    const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
+#if PY_MAJOR_VERSION < 3
+    PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast<char *>(name),
+                                        const_cast<char *>(format),
+                                        PythonFormat<T>::get(t)...);
+#else
+    PyObject *obj =
+        PyObject_CallMethod(m_py_obj, name, format, PythonFormat<T>::get(t)...);
+#endif
+    if (!obj)
+      return exception();
+    return python::Take<PythonObject>(obj);
+  }
+
+  llvm::Expected<PythonObject> GetAttribute(const char *name) const {
+    if (!m_py_obj)
+      return nullDeref();
+    PyObject *obj = PyObject_GetAttrString(m_py_obj, name);
+    if (!obj)
+      return exception();
+    return python::Take<PythonObject>(obj);
+  }
+
+  llvm::Expected<bool> IsTrue() {
+    if (!m_py_obj)
+      return nullDeref();
+    int r = PyObject_IsTrue(m_py_obj);
+    if (r < 0)
+      return exception();
+    return !!r;
+  }
+
+  llvm::Expected<long long> AsLongLong() {
+    if (!m_py_obj)
+      return nullDeref();
+    assert(!PyErr_Occurred());
+    long long r = PyLong_AsLongLong(m_py_obj);
+    if (PyErr_Occurred())
+      return exception();
+    return r;
+  }
+
+  llvm::Expected<bool> IsInstance(const PythonObject &cls) {
+    if (!m_py_obj || !cls.IsValid())
+      return nullDeref();
+    int r = PyObject_IsInstance(m_py_obj, cls.get());
+    if (r < 0)
+      return exception();
+    return !!r;
+  }
+
+protected:
   PyObject *m_py_obj;
 };
 
+namespace python {
+
+// This is why C++ needs monads.
+template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {
+  if (!obj)
+    return obj.takeError();
+  if (!T::Check(obj.get().get()))
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "type error");
+  return T(PyRefType::Borrowed, std::move(obj.get().get()));
+}
+
+template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);
+
+template <>
+llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);
+
+} // namespace python
+
 class PythonBytes : public PythonObject {
 public:
   PythonBytes();
@@ -245,9 +429,10 @@ public:
 
 class PythonString : public PythonObject {
 public:
+  static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);
+
   PythonString();
-  explicit PythonString(llvm::StringRef string);
-  explicit PythonString(const char *string);
+  explicit PythonString(llvm::StringRef string); // safe, null on error
   PythonString(PyRefType type, PyObject *o);
 
   ~PythonString() override;
@@ -259,11 +444,13 @@ public:
 
   void Reset(PyRefType type, PyObject *py_obj) override;
 
-  llvm::StringRef GetString() const;
+  llvm::StringRef GetString() const; // safe, empty string on error
+
+  llvm::Expected<llvm::StringRef> AsUTF8() const;
 
   size_t GetSize() const;
 
-  void SetString(llvm::StringRef string);
+  void SetString(llvm::StringRef string); // safe, null on error
 
   StructuredData::StringSP CreateStructuredString() const;
 };
@@ -406,7 +593,20 @@ public:
 
   static PythonModule AddModule(llvm::StringRef module);
 
-  static PythonModule ImportModule(llvm::StringRef module);
+  // safe, returns invalid on error;
+  static PythonModule ImportModule(llvm::StringRef name) {
+    std::string s = name;
+    auto mod = Import(s.c_str());
+    if (!mod) {
+      llvm::consumeError(mod.takeError());
+      return PythonModule();
+    }
+    return std::move(mod.get());
+  }
+
+  static llvm::Expected<PythonModule> Import(const char *name);
+
+  llvm::Expected<PythonObject> Get(const char *name);
 
   // Bring in the no-argument base class version
   using PythonObject::Reset;
@@ -471,8 +671,59 @@ public:
   void Reset(File &file, const char *mode);
 
   lldb::FileUP GetUnderlyingFile() const;
+
+  llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);
+  llvm::Expected<lldb::FileSP>
+  ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);
 };
 
+class PythonException : public llvm::ErrorInfo<PythonException> {
+private:
+  PyObject *m_exception_type, *m_exception, *m_traceback;
+  PyObject *m_repr_bytes;
+
+public:
+  static char ID;
+  const char *toCString() const;
+  PythonException(const char *caller = nullptr);
+  void Restore();
+  ~PythonException();
+  void log(llvm::raw_ostream &OS) const override;
+  std::error_code convertToErrorCode() const override;
+};
+
+// This extracts the underlying T out of an Expected<T> and returns it.
+// If the Expected is an Error instead of a T, that error will be converted
+// into a python exception, and this will return a default-constructed T.
+//
+// This is appropriate for use right at the boundary of python calling into
+// C++, such as in a SWIG typemap.   In such a context you should simply
+// check if the returned T is valid, and if it is, return a NULL back
+// to python.   This will result in the Error being raised as an exception
+// from python code's point of view.
+//
+// For example:
+// ```
+// Expected<Foo *> efoop = some_cpp_function();
+// Foo *foop = unwrapOrSetPythonException(efoop);
+// if (!foop)
+//    return NULL;
+// do_something(*foop);
+//
+// If the Error returned was itself created because a python exception was
+// raised when C++ code called into python, then the original exception
+// will be restored.   Otherwise a simple string exception will be raised.
+template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {
+  if (expected)
+    return expected.get();
+  llvm::handleAllErrors(
+      expected.takeError(), [](PythonException &E) { E.Restore(); },
+      [](const llvm::ErrorInfoBase &E) {
+        PyErr_SetString(PyExc_Exception, E.message().c_str());
+      });
+  return T();
+}
+
 } // namespace lldb_private
 
 #endif




More information about the lldb-commits mailing list