diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 5d1f44988a6df1..e473110eca7415 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -144,7 +144,6 @@ typedef struct _optimization_stats { uint64_t unknown_callee; uint64_t trace_immediately_deopts; uint64_t executors_invalidated; - uint64_t fitness_terminated_traces; UOpStats opcode[PYSTATS_MAX_UOP_ID + 1]; uint64_t unsupported_opcode[256]; uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 0cebe1b4b9e995..f76d4f41c55119 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -449,10 +449,6 @@ typedef struct _PyOptimizationConfig { uint16_t side_exit_initial_value; uint16_t side_exit_initial_backoff; - // Trace fitness thresholds - uint16_t fitness_initial; - uint16_t fitness_initial_side; - // Optimization flags bool specialization_enabled; bool uops_optimize_enabled; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 820ee32201c1f8..2986afb142b5d1 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -15,23 +15,6 @@ extern "C" { #include "pycore_optimizer_types.h" #include -/* Default fitness configuration values for trace quality control. - * FITNESS_INITIAL and FITNESS_INITIAL_SIDE can be overridden via - * PYTHON_JIT_FITNESS_INITIAL and PYTHON_JIT_FITNESS_INITIAL_SIDE */ -#define FITNESS_PER_INSTRUCTION 2 -#define FITNESS_BRANCH_BASE 5 -#define FITNESS_INITIAL (FITNESS_PER_INSTRUCTION * 1000) -#define FITNESS_INITIAL_SIDE (FITNESS_INITIAL / 2) -#define FITNESS_BACKWARD_EDGE (FITNESS_INITIAL / 10) - -/* Exit quality constants for fitness-based trace termination. - * Higher values mean better places to stop the trace. */ - -#define EXIT_QUALITY_DEFAULT 200 -#define EXIT_QUALITY_CLOSE_LOOP (4 * EXIT_QUALITY_DEFAULT) -#define EXIT_QUALITY_ENTER_EXECUTOR (2 * EXIT_QUALITY_DEFAULT + 100) -#define EXIT_QUALITY_SPECIALIZABLE (EXIT_QUALITY_DEFAULT / 4) - typedef struct _PyJitUopBuffer { _PyUOpInstruction *start; @@ -118,8 +101,7 @@ typedef struct _PyJitTracerPreviousState { } _PyJitTracerPreviousState; typedef struct _PyJitTracerTranslatorState { - int32_t fitness; // Current trace fitness, starts high, decrements - int frame_depth; // Current inline depth (0 = root frame) + int jump_backward_seen; } _PyJitTracerTranslatorState; typedef struct _PyJitTracerState { diff --git a/Python/optimizer.c b/Python/optimizer.c index c7a6b7e746545c..f09bf778587b12 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -549,6 +549,8 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = { }; +#define CONFIDENCE_RANGE 1000 +#define CONFIDENCE_CUTOFF 333 #ifdef Py_DEBUG #define DPRINTF(level, ...) \ @@ -596,46 +598,6 @@ add_to_trace( ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) -/* Compute branch fitness penalty based on how likely the traced path is. - * The penalty is small when the traced path is common, large when rare. - * A branch that historically goes the other way gets a heavy penalty. */ -static inline int -compute_branch_penalty(uint16_t history, bool branch_taken) -{ - int taken_count = _Py_popcount32((uint32_t)history); - int on_trace_count = branch_taken ? taken_count : 16 - taken_count; - int off_trace = 16 - on_trace_count; - /* Linear scaling: off_trace ranges from 0 (fully biased our way) - * to 16 (fully biased against us), so the penalty ranges from - * FITNESS_BRANCH_BASE to FITNESS_BRANCH_BASE + 32. */ - return FITNESS_BRANCH_BASE + off_trace * 2; -} - -/* Compute exit quality for the current trace position. - * Higher values mean better places to stop the trace. */ -static inline int32_t -compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode, - const _PyJitTracerState *tracer) -{ - if (target_instr == tracer->initial_state.start_instr || - target_instr == tracer->initial_state.close_loop_instr) { - return EXIT_QUALITY_CLOSE_LOOP; - } - if (target_instr->op.code == ENTER_EXECUTOR) { - return EXIT_QUALITY_ENTER_EXECUTOR; - } - if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) { - return EXIT_QUALITY_SPECIALIZABLE; - } - return EXIT_QUALITY_DEFAULT; -} - -static inline int32_t -compute_frame_penalty(const _PyOptimizationConfig *cfg) -{ - return (int32_t)cfg->fitness_initial / 10 + 1; -} - static int is_terminator(const _PyUOpInstruction *uop) { @@ -675,7 +637,6 @@ _PyJit_translate_single_bytecode_to_trace( _Py_CODEUNIT *this_instr = tracer->prev_state.instr; _Py_CODEUNIT *target_instr = this_instr; uint32_t target = 0; - int end_trace_opcode = _DEOPT; target = Py_IsNone((PyObject *)old_code) ? (uint32_t)(target_instr - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR) @@ -773,14 +734,16 @@ _PyJit_translate_single_bytecode_to_trace( DPRINTF(2, "Unsupported: oparg too large\n"); unsupported: { + // Rewind to previous instruction and replace with _EXIT_TRACE. _PyUOpInstruction *curr = uop_buffer_last(trace); while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) { trace->next--; curr = uop_buffer_last(trace); } + assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2); if (curr->opcode == _SET_IP) { int32_t old_target = (int32_t)uop_get_target(curr); - curr->opcode = end_trace_opcode; + curr->opcode = _DEOPT; curr->format = UOP_FORMAT_TARGET; curr->target = old_target; } @@ -800,23 +763,6 @@ _PyJit_translate_single_bytecode_to_trace( return 1; } - // Fitness-based trace quality check (before reserving space for this instruction) - _PyJitTracerTranslatorState *ts = &tracer->translator_state; - int32_t eq = compute_exit_quality(target_instr, opcode, tracer); - DPRINTF(3, "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n", - _PyOpcode_OpName[opcode], oparg, ts->fitness, eq, ts->frame_depth); - - // Check if fitness is depleted — should we stop the trace? - if (ts->fitness < eq) { - // This is a tracer heuristic rather than normal program control flow, - // so leave operand1 clear and let the resulting side exit increase chain_depth. - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); - OPT_STAT_INC(fitness_terminated_traces); - DPRINTF(2, "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n", - _PyOpcode_OpName[opcode], oparg, ts->fitness, eq); - goto done; - } - // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT trace->end -= 2; @@ -870,12 +816,6 @@ _PyJit_translate_single_bytecode_to_trace( assert(jump_happened ? (next_instr == computed_jump_instr) : (next_instr == computed_next_instr)); uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened]; ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code)); - int bp = compute_branch_penalty(target_instr[1].cache, jump_happened); - tracer->translator_state.fitness -= bp; - DPRINTF(3, " branch penalty: -%d (history=0x%04x, taken=%d) -> fitness=%d\n", - bp, target_instr[1].cache, jump_happened, - tracer->translator_state.fitness); - break; } case JUMP_BACKWARD_JIT: @@ -883,9 +823,6 @@ _PyJit_translate_single_bytecode_to_trace( case JUMP_BACKWARD_NO_JIT: case JUMP_BACKWARD: ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); - tracer->translator_state.fitness -= FITNESS_BACKWARD_EDGE; - DPRINTF(3, " backward edge penalty: -%d -> fitness=%d\n", - FITNESS_BACKWARD_EDGE, tracer->translator_state.fitness); _Py_FALLTHROUGH; case JUMP_BACKWARD_NO_INTERRUPT: { @@ -1008,44 +945,6 @@ _PyJit_translate_single_bytecode_to_trace( assert(next->op.code == STORE_FAST); operand = next->op.arg; } - else if (uop == _PUSH_FRAME) { - _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; - ts_depth->frame_depth++; - if (ts_depth->frame_depth >= MAX_ABSTRACT_FRAME_DEPTH) { - // The optimizer can't handle frames this deep, - // so there's no point continuing the trace. - DPRINTF(2, "Unsupported: frame depth %d >= MAX_ABSTRACT_FRAME_DEPTH\n", - ts_depth->frame_depth); - end_trace_opcode = _EXIT_TRACE; - goto unsupported; - } - int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config); - int32_t cost = frame_penalty * ts_depth->frame_depth; - ts_depth->fitness -= cost; - DPRINTF(3, " _PUSH_FRAME: depth=%d, penalty=-%d (per_frame=%d) -> fitness=%d\n", - ts_depth->frame_depth, cost, frame_penalty, - ts_depth->fitness); - } - else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { - _PyJitTracerTranslatorState *ts_depth = &tracer->translator_state; - int32_t frame_penalty = compute_frame_penalty(&tstate->interp->opt_config); - if (ts_depth->frame_depth <= 0) { - // Underflow: returning from a frame we didn't enter - ts_depth->fitness -= frame_penalty * 2; - DPRINTF(3, " %s: underflow penalty=-%d -> fitness=%d\n", - _PyOpcode_uop_name[uop], frame_penalty * 2, - ts_depth->fitness); - } - else { - // Reward returning: small inlined calls should be encouraged - ts_depth->fitness += frame_penalty; - DPRINTF(3, " %s: return reward=+%d, depth=%d -> fitness=%d\n", - _PyOpcode_uop_name[uop], frame_penalty, - ts_depth->frame_depth - 1, - ts_depth->fitness); - } - ts_depth->frame_depth = ts_depth->frame_depth <= 0 ? 0 : ts_depth->frame_depth - 1; - } else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) { PyObject *recorded_value = tracer->prev_state.recorded_value; tracer->prev_state.recorded_value = NULL; @@ -1087,13 +986,7 @@ _PyJit_translate_single_bytecode_to_trace( ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); goto done; } - // Update fitness AFTER translation, BEFORE returning to continue tracing. - // This ensures the next iteration's fitness check reflects the cost of - // all instructions translated so far. - tracer->translator_state.fitness -= FITNESS_PER_INSTRUCTION; - DPRINTF(3, " per-insn cost: -%d -> fitness=%d\n", - FITNESS_PER_INSTRUCTION, tracer->translator_state.fitness); - DPRINTF(2, "Trace continuing (fitness=%d)\n", tracer->translator_state.fitness); + DPRINTF(2, "Trace continuing\n"); return 1; done: DPRINTF(2, "Trace done\n"); @@ -1176,17 +1069,6 @@ _PyJit_TryInitializeTracing( assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL)); tracer->initial_state.jump_backward_instr = curr_instr; - // Initialize fitness tracking state - const _PyOptimizationConfig *cfg = &tstate->interp->opt_config; - _PyJitTracerTranslatorState *ts = &tracer->translator_state; - bool is_side_trace = (exit != NULL); - ts->fitness = is_side_trace - ? (int32_t)cfg->fitness_initial_side - : (int32_t)cfg->fitness_initial; - ts->frame_depth = 0; - DPRINTF(3, "Fitness init: %s trace, fitness=%d\n", - is_side_trace ? "side" : "root", ts->fitness); - tracer->is_tracing = true; return 1; } diff --git a/Python/pystate.c b/Python/pystate.c index 78eab7cc7d2459..143175da0f45c7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -635,22 +635,6 @@ init_interpreter(PyInterpreterState *interp, "PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF", SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF); - // Trace fitness configuration - init_policy(&interp->opt_config.fitness_initial, - "PYTHON_JIT_FITNESS_INITIAL", - FITNESS_INITIAL, 100, 10000); - init_policy(&interp->opt_config.fitness_initial_side, - "PYTHON_JIT_FITNESS_INITIAL_SIDE", - FITNESS_INITIAL_SIDE, 50, 5000); - /* The tracer starts at start_instr, so initial fitness must not be below - * the close-loop exit quality or tracing will terminate immediately. */ - if (interp->opt_config.fitness_initial < EXIT_QUALITY_CLOSE_LOOP) { - interp->opt_config.fitness_initial = EXIT_QUALITY_CLOSE_LOOP; - } - if (interp->opt_config.fitness_initial_side < EXIT_QUALITY_CLOSE_LOOP) { - interp->opt_config.fitness_initial_side = EXIT_QUALITY_CLOSE_LOOP; - } - interp->opt_config.specialization_enabled = !is_env_enabled("PYTHON_SPECIALIZATION_OFF"); interp->opt_config.uops_optimize_enabled = !is_env_disabled("PYTHON_UOPS_OPTIMIZE"); if (interp != &runtime->_main_interpreter) { diff --git a/Python/pystats.c b/Python/pystats.c index 2fac2db1b738c7..a057ad884566d8 100644 --- a/Python/pystats.c +++ b/Python/pystats.c @@ -274,7 +274,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence); fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee); fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated); - fprintf(out, "Optimization fitness terminated: %" PRIu64 "\n", stats->fitness_terminated_traces); print_histogram(out, "Trace length", stats->trace_length_hist); print_histogram(out, "Trace run length", stats->trace_run_length_hist);