Skip to content

Tier 2 cleanups and tweaks #115534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 85 additions & 72 deletions Lib/test/test_capi/test_opt.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Lib/test/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ class TestOptimizer(MonitoringTestBase, unittest.TestCase):
def setUp(self):
import _testinternalcapi
self.old_opt = _testinternalcapi.get_optimizer()
opt = _testinternalcapi.get_counter_optimizer()
opt = _testinternalcapi.new_counter_optimizer()
_testinternalcapi.set_optimizer(opt)
super(TestOptimizer, self).setUp()

Expand Down
8 changes: 4 additions & 4 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -960,13 +960,13 @@ iframe_getlasti(PyObject *self, PyObject *frame)
}

static PyObject *
get_counter_optimizer(PyObject *self, PyObject *arg)
new_counter_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewCounter();
}

static PyObject *
get_uop_optimizer(PyObject *self, PyObject *arg)
new_uop_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewUOpOptimizer();
}
Expand Down Expand Up @@ -1711,8 +1711,8 @@ static PyMethodDef module_functions[] = {
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
{"set_optimizer", set_optimizer, METH_O, NULL},
{"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL},
{"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
{"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL},
{"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL},
{"new_uop_optimizer", new_uop_optimizer, METH_NOARGS, NULL},
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
{"invalidate_executors", invalidate_executors, METH_O, NULL},
{"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),
Expand Down
61 changes: 40 additions & 21 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,10 @@ static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = {

extern const struct _PyCode_DEF(8) _Py_InitCleanup;

extern const char *_PyUOpName(int index);
#ifdef Py_DEBUG
extern void _PyUOpPrint(const _PyUOpInstruction *uop);
#endif


/* Disable unused label warnings. They are handy for debugging, even
if computed gotos aren't used. */
Expand Down Expand Up @@ -1006,14 +1009,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
for (;;) {
uopcode = next_uop->opcode;
DPRINTF(3,
"%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n",
(int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace)),
_PyUOpName(uopcode),
next_uop->oparg,
next_uop->operand,
next_uop->target,
#ifdef Py_DEBUG
if (lltrace >= 3) {
printf("%4d uop: ", (int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace)));
_PyUOpPrint(next_uop);
printf(" stack_level=%d\n",
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
}
#endif
next_uop++;
OPT_STAT_INC(uops_executed);
UOP_STAT_INC(uopcode, execution_count);
Expand All @@ -1028,9 +1031,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
default:
#ifdef Py_DEBUG
{
fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n",
next_uop[-1].opcode, next_uop[-1].oparg, next_uop[-1].operand,
(int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace) - 1));
printf("Unknown uop: ");
_PyUOpPrint(&next_uop[-1]);
printf(" @ %d\n", (int)(next_uop - current_executor->trace - 1));
Py_FatalError("Unknown uop");
}
#else
Expand Down Expand Up @@ -1058,10 +1061,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
DPRINTF(2, "Error: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
(int)(next_uop - current_executor->trace - 1),
_PyOpcode_OpName[frame->instr_ptr->op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("Error: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(" @ %d -> %s]\n",
(int)(next_uop - current_executor->trace - 1),
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Expand All @@ -1072,9 +1080,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
// Jump here from DEOPT_IF()
deoptimize:
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
_PyOpcode_OpName[next_instr->op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("DEOPT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(" -> %s]\n",
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(uopcode, miss);
Py_DECREF(current_executor);
Expand All @@ -1088,9 +1101,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
uint32_t exit_index = next_uop[-1].exit_index;
assert(exit_index < current_executor->exit_count);
_PyExitData *exit = &current_executor->exits[exit_index];
DPRINTF(2, "SIDE EXIT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", exit %u, temp %d, target %d -> %s]\n",
uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, exit_index, exit->temperature,
exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
exit_index, exit->temperature, exit->target,
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
Py_INCREF(exit->executor);
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_TWO(exit->executor);
Expand Down
77 changes: 63 additions & 14 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,22 @@ is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
return PyBool_FromLong(((_PyExecutorObject *)self)->vm_data.valid);
}

static PyObject *
get_opcode(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.opcode);
}

static PyObject *
get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.oparg);
}

static PyMethodDef executor_methods[] = {
{ "is_valid", is_valid, METH_NOARGS, NULL },
{ "get_opcode", get_opcode, METH_NOARGS, NULL },
{ "get_oparg", get_oparg, METH_NOARGS, NULL },
{ NULL, NULL },
};

Expand All @@ -282,9 +296,30 @@ uop_dealloc(_PyExecutorObject *self) {
const char *
_PyUOpName(int index)
{
if (index < 0 || index > MAX_UOP_ID) {
return NULL;
}
return _PyOpcode_uop_name[index];
}

#ifdef Py_DEBUG
void
_PyUOpPrint(const _PyUOpInstruction *uop)
{
const char *name = _PyUOpName(uop->opcode);
if (name == NULL) {
printf("<uop %d>", uop->opcode);
}
else {
printf("%s", name);
}
printf(" (%d, target=%d, operand=%" PRIx64 ")",
uop->oparg,
uop->target,
(uint64_t)uop->operand);
}
#endif

static Py_ssize_t
uop_len(_PyExecutorObject *self)
{
Expand Down Expand Up @@ -312,14 +347,21 @@ uop_item(_PyExecutorObject *self, Py_ssize_t index)
Py_DECREF(oname);
return NULL;
}
PyObject *target = PyLong_FromUnsignedLong(self->trace[index].target);
if (oparg == NULL) {
Py_DECREF(oparg);
Py_DECREF(oname);
return NULL;
}
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
if (operand == NULL) {
Py_DECREF(target);
Py_DECREF(oparg);
Py_DECREF(oname);
return NULL;
}
PyObject *args[3] = { oname, oparg, operand };
return _PyTuple_FromArraySteal(args, 3);
PyObject *args[4] = { oname, oparg, target, operand };
return _PyTuple_FromArraySteal(args, 4);
}

PySequenceMethods uop_as_sequence = {
Expand Down Expand Up @@ -390,19 +432,29 @@ BRANCH_TO_GUARD[4][2] = {
#endif


// Beware: Macro arg order differs from struct member order
#ifdef Py_DEBUG
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
DPRINTF(2, \
" ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \
_PyUOpName(OPCODE), \
(OPARG), \
(uint64_t)(OPERAND), \
TARGET); \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
if (lltrace >= 2) { \
printf("%4d ADD_TO_TRACE: ", trace_length); \
_PyUOpPrint(&trace[trace_length]); \
printf("\n"); \
} \
trace_length++;
#else
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
#endif

#define INSTR_IP(INSTR, CODE) \
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
Expand Down Expand Up @@ -890,12 +942,9 @@ make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *depende
if (lltrace >= 2) {
printf("Optimized executor (length %d):\n", length);
for (int i = 0; i < length; i++) {
printf("%4d %s(%d, %d, %" PRIu64 ")\n",
i,
_PyUOpName(executor->trace[i].opcode),
executor->trace[i].oparg,
executor->trace[i].target,
executor->trace[i].operand);
printf("%4d OPTIMIZED: ", i);
_PyUOpPrint(&executor->trace[i]);
printf("\n");
}
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)

#ifdef Py_DEBUG
extern const char *_PyUOpName(int index);
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
static inline int get_lltrace(void) {
char *uop_debug = Py_GETENV(DEBUG_ENV);
Expand Down Expand Up @@ -632,7 +633,7 @@ uop_redundancy_eliminator(
_Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer;

DPRINTF(3, "Abstract interpreting %s:%d ",
_PyOpcode_uop_name[opcode],
_PyUOpName(opcode),
oparg);
switch (opcode) {
#include "tier2_redundancy_eliminator_cases.c.h"
Expand Down
12 changes: 4 additions & 8 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <stdlib.h> // rand()

extern const char *_PyUOpName(int index);

/* For guidance on adding or extending families of instructions see
* ./adaptive.md
Expand Down Expand Up @@ -246,17 +247,12 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
stats->optimizer_failure_reason_no_memory);

const char* const* names;
for (int i = 0; i < 512; i++) {
if (i < 256) {
names = _PyOpcode_OpName;
} else {
names = _PyOpcode_uop_name;
}
for (int i = 0; i <= MAX_UOP_ID; i++) {
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
}
if (stats->opcode[i].miss) {
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", names[i], stats->opcode[i].miss);
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
}
}

Expand Down
4 changes: 4 additions & 0 deletions Tools/scripts/summarize_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def load_raw_data(input: Path) -> RawData:
file=sys.stderr,
)
continue
# Hack to handle older data files where some uops
# are missing an underscore prefix in their name
if key.startswith("uops[") and key[5:6] != "_":
key = "uops[_" + key[5:]
stats[key.strip()] += int(value)
stats["__nfiles__"] += 1

Expand Down