Skip to main content

Lib/pickle.py (part 2)

Source:

cpython 3.14 @ ab2d84fe1023/Lib/pickle.py

This annotation covers the unpickling side and custom pickling hooks. See modules_pickle_detail for Pickler.dump, the opcode encoding, and protocol selection.

Map

LinesSymbolRole
1-100Unpickler.loadMain dispatch loop; reads opcode, calls handler
101-250dispatch tableOpcode byte → handler method
251-400load_reduceHandle REDUCE opcode: callable(*args)
401-550load_buildHandle BUILD opcode: __setstate__ / __dict__ update
551-700persistent_id / persistent_loadSerialize external references by ID
701-900__reduce_ex__ protocolHow objects describe themselves for pickling
901-1100copyreg dispatchRegistered reduce functions for C types
1101-1800Protocol 5Out-of-band buffer pickling via PickleBuffer

Reading

Unpickler.load

# CPython: Lib/pickle.py:1420 Unpickler.load
def load(self):
"""Read a pickled object representation from self.file_object."""
dispatch = self.dispatch
try:
while True:
key = self.read(1)
if not key:
raise EOFError
assert isinstance(key, bytes_types)
dispatch[key[0]](self) # call opcode handler
except _Stop as stopinst:
return stopinst.value

The pickle protocol is a stack machine. Each opcode handler pushes/pops from self.stack or manipulates self.memo.

load_reduce

# CPython: Lib/pickle.py:1580 load_reduce
def load_reduce(self):
stack = self.stack
args = stack.pop()
func = stack[-1]
stack[-1] = func(*args)
dispatch[REDUCE[0]] = load_reduce

REDUCE pops (callable, args) from the stack and pushes callable(*args). This is the core reconstruction mechanism for most Python objects.

load_build

# CPython: Lib/pickle.py:1610 load_build
def load_build(self):
stack = self.stack
state = stack.pop()
inst = stack[-1]
setstate = getattr(inst, '__setstate__', MISSING)
if setstate is not MISSING:
setstate(state)
return
# No __setstate__: update __dict__ directly
slotstate = None
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if state:
inst_dict = inst.__dict__
intern = sys.intern
for k, v in state.items():
if type(k) is str:
inst_dict[intern(k)] = v
else:
inst_dict[k] = v
if slotstate:
for k, v in slotstate.items():
setattr(inst, k, v)
dispatch[BUILD[0]] = load_build

__reduce_ex__

# CPython: Lib/pickle.py:780 Pickler.save_reduce
# __reduce_ex__(protocol) returns one of:
# (callable, args)
# (callable, args, state)
# (callable, args, state, list_items)
# (callable, args, state, list_items, dict_items)
# (callable, args, state, list_items, dict_items, namespace_items)

Each element beyond (callable, args) is optional. state is passed to __setstate__; list_items and dict_items are for list/dict subclasses.

persistent_id

# CPython: Lib/pickle.py:830 persistent_id
# Override in a Pickler subclass to externalize objects:
class MyPickler(Pickler):
def persistent_id(self, obj):
if isinstance(obj, MyDBRecord):
return ('record', obj.id)
return None # None means: pickle normally

class MyUnpickler(Unpickler):
def persistent_load(self, pid):
type_tag, key = pid
if type_tag == 'record':
return db.get(key)
raise UnpicklingError(f'unsupported pid: {pid!r}')

gopy notes

pickle is pure Python but accelerated by _pickle (C). gopy uses the Python implementation with the C module loaded as a speedup when available. __reduce_ex__ dispatch uses type.__reduce_ex__ which calls object.__reduce_ex__ in objects/object.go. persistent_id / persistent_load are virtual method overrides handled by gopy's Python subclassing mechanism.