Skip to content

gh-102381: don't call watcher callback with dead object #102382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ _PyDict_NotifyEvent(PyDict_WatchEvent event,
PyObject *key,
PyObject *value)
{
assert(Py_REFCNT((PyObject*)mp) > 0);
int watcher_bits = mp->ma_version_tag & DICT_VERSION_MASK;
if (watcher_bits) {
_PyDict_SendEvent(watcher_bits, event, mp, key, value);
Expand Down
49 changes: 47 additions & 2 deletions Lib/test/test_capi/test_watchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,21 @@ def test_error(self):
self.watch(wid, d)
with catch_unraisable_exception() as cm:
d["foo"] = "bar"
self.assertIs(cm.unraisable.object, d)
self.assertIn(
"watcher callback for <dict at",
cm.unraisable.object
)
self.assertEqual(str(cm.unraisable.exc_value), "boom!")
self.assert_events([])

def test_dealloc_error(self):
d = {}
with self.watcher(kind=self.ERROR) as wid:
self.watch(wid, d)
with catch_unraisable_exception() as cm:
del d
self.assertEqual(str(cm.unraisable.exc_value), "boom!")

def test_two_watchers(self):
d1 = {}
d2 = {}
Expand Down Expand Up @@ -389,6 +400,22 @@ def test_code_object_events_dispatched(self):
del co4
self.assert_event_counts(0, 0, 0, 0)

def test_error(self):
with self.code_watcher(2):
with catch_unraisable_exception() as cm:
co = _testcapi.code_newempty("test_watchers", "dummy0", 0)

self.assertEqual(cm.unraisable.object, f"watcher callback for {co!r}")
self.assertEqual(str(cm.unraisable.exc_value), "boom!")

def test_dealloc_error(self):
co = _testcapi.code_newempty("test_watchers", "dummy0", 0)
with self.code_watcher(2):
with catch_unraisable_exception() as cm:
del co

self.assertEqual(str(cm.unraisable.exc_value), "boom!")

def test_clear_out_of_range_watcher_id(self):
with self.assertRaisesRegex(ValueError, r"Invalid code watcher ID -1"):
_testcapi.clear_code_watcher(-1)
Expand Down Expand Up @@ -479,7 +506,25 @@ def watcher(*args):
def myfunc():
pass

self.assertIs(cm.unraisable.object, myfunc)
self.assertEqual(
cm.unraisable.object,
f"watcher callback for {myfunc!r}"
)

def test_dealloc_watcher_raises_error(self):
class MyError(Exception):
pass

def watcher(*args):
raise MyError("testing 123")

def myfunc():
pass

with self.add_watcher(watcher):
with catch_unraisable_exception() as cm:
del myfunc

self.assertIsInstance(cm.unraisable.exc_value, MyError)

def test_clear_out_of_range_watcher_id(self):
Expand Down
11 changes: 11 additions & 0 deletions Modules/_testcapi/watchers.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,13 @@ noop_code_event_handler(PyCodeEvent event, PyCodeObject *co)
return 0;
}

static int
error_code_event_handler(PyCodeEvent event, PyCodeObject *co)
{
PyErr_SetString(PyExc_RuntimeError, "boom!");
return -1;
}

static PyObject *
add_code_watcher(PyObject *self, PyObject *which_watcher)
{
Expand All @@ -333,7 +340,11 @@ add_code_watcher(PyObject *self, PyObject *which_watcher)
num_code_object_created_events[1] = 0;
num_code_object_destroyed_events[1] = 0;
}
else if (which_l == 2) {
watcher_id = PyCode_AddWatcher(error_code_event_handler);
}
else {
PyErr_Format(PyExc_ValueError, "invalid watcher %d", which_l);
return NULL;
}
if (watcher_id < 0) {
Expand Down
18 changes: 17 additions & 1 deletion Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "clinic/codeobject.c.h"

static PyObject* code_repr(PyCodeObject *co);

static void
notify_code_watchers(PyCodeEvent event, PyCodeObject *co)
{
assert(Py_REFCNT(co) > 0);
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->_initialized);
uint8_t bits = interp->active_code_watchers;
Expand All @@ -25,7 +28,13 @@ notify_code_watchers(PyCodeEvent event, PyCodeObject *co)
// callback must be non-null if the watcher bit is set
assert(cb != NULL);
if (cb(event, co) < 0) {
PyErr_WriteUnraisable((PyObject *) co);
// Don't risk resurrecting the object if an unraisablehook keeps
// a reference; pass a string as context.
PyObject *repr = code_repr(co);
PyObject *context = PyUnicode_FromFormat("watcher callback for %U", repr);
PyErr_WriteUnraisable(context);
Py_DECREF(context);
Py_DECREF(repr);
}
}
i++;
Expand Down Expand Up @@ -1667,7 +1676,14 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
static void
code_dealloc(PyCodeObject *co)
{
assert(Py_REFCNT(co) == 0);
Py_SET_REFCNT(co, 1);
notify_code_watchers(PY_CODE_EVENT_DESTROY, co);
if (Py_REFCNT(co) > 1) {
Py_DECREF(co);
return;
}
Py_SET_REFCNT(co, 0);

if (co->co_extra != NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET();
Expand Down
17 changes: 14 additions & 3 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2308,7 +2308,14 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
static void
dict_dealloc(PyDictObject *mp)
{
assert(Py_REFCNT(mp) == 0);
Py_SET_REFCNT(mp, 1);
_PyDict_NotifyEvent(PyDict_EVENT_DEALLOCATED, mp, NULL, NULL);
if(Py_REFCNT(mp) > 1) {
Py_DECREF(mp);
return;
}
Py_SET_REFCNT(mp, 0);
PyDictValues *values = mp->ma_values;
PyDictKeysObject *keys = mp->ma_keys;
Py_ssize_t i, n;
Expand Down Expand Up @@ -5744,9 +5751,13 @@ _PyDict_SendEvent(int watcher_bits,
if (watcher_bits & 1) {
PyDict_WatchCallback cb = interp->dict_state.watchers[i];
if (cb && (cb(event, (PyObject*)mp, key, value) < 0)) {
// some dict modification paths (e.g. PyDict_Clear) can't raise, so we
// can't propagate exceptions from dict watchers.
PyErr_WriteUnraisable((PyObject *)mp);
// We don't want to resurrect the dict by potentially having an
// unraisablehook keep a reference to it, so we don't pass the
// dict as context, just an informative string message. Dict
// repr can call arbitrary code, so we invent a simpler version.
PyObject *context = PyUnicode_FromFormat("watcher callback for <dict at %p>", mp);
PyErr_WriteUnraisable(context);
Py_DECREF(context);
}
}
watcher_bits >>= 1;
Expand Down
18 changes: 17 additions & 1 deletion Objects/funcobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "structmember.h" // PyMemberDef

static PyObject* func_repr(PyFunctionObject *op);

static void
notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event,
PyFunctionObject *func, PyObject *new_value)
Expand All @@ -21,7 +23,13 @@ notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event,
// callback must be non-null if the watcher bit is set
assert(cb != NULL);
if (cb(event, func, new_value) < 0) {
PyErr_WriteUnraisable((PyObject *) func);
// Don't risk resurrecting the func if an unraisablehook keeps a
// reference; pass a string as context.
PyObject *repr = func_repr(func);
PyObject *context = PyUnicode_FromFormat("watcher callback for %U", repr);
PyErr_WriteUnraisable(context);
Py_DECREF(context);
Py_DECREF(repr);
}
}
i++;
Expand All @@ -33,6 +41,7 @@ static inline void
handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func,
PyObject *new_value)
{
assert(Py_REFCNT(func) > 0);
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->_initialized);
if (interp->active_func_watchers) {
Expand Down Expand Up @@ -766,7 +775,14 @@ func_clear(PyFunctionObject *op)
static void
func_dealloc(PyFunctionObject *op)
{
assert(Py_REFCNT(op) == 0);
Py_SET_REFCNT(op, 1);
handle_func_event(PyFunction_EVENT_DESTROY, op, NULL);
if (Py_REFCNT(op) > 1) {
Py_DECREF(op);
return;
}
Py_SET_REFCNT(op, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this Py_SET_REFCNT necessary? IIUC this is redundant here otherwise it isn't obvious to me and a comment would be nice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the normal case, assuming the watcher has not taken any new reference to the object, the refcount will be 1 at this point (because we set it to 1 above.) We should set it to 0 before continuing with deallocation, because objects being deallocated should have a refcount of zero.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The watcher must take a reference to any object it is watching, otherwise the refcount is incorrect and we crash and burn.

The GC, or an optimizer, can legitimately assume that any object with a reference count of zero and no weakrefs is to be freed and re-use the memory. That's not going to end well if the watcher still thinks that memory is still a dictionary.

Copy link
Member Author

@carljm carljm Mar 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@markshannon I think your comment is more relevant to the discussion below (about why to have dealloc watchers at all) than it is to this thread, so I'll reply below.

The answer to @kumaraditya303's question here is that this Py_SET_REFCNT(op, 0) is the decref that has to be paired with the incref Py_SET_REFCNT(op, 1); that occurs on line 799. We use explicit Py_SET_REFCNT instead of Py_INCREF and Py_DECREF because Py_DECREF would cause recursive deallocation, and if we use Py_SET_REFCNT for the decref we have to use it for the incref also, otherwise the total refcount tracking will be off.

_PyObject_GC_UNTRACK(op);
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);
Expand Down