Skip to main content

Objects/bytesobject.c (part 5)

Source:

cpython 3.14 @ ab2d84fe1023/Objects/bytesobject.c

This annotation covers search and manipulation methods. See objects_bytesobject4_detail for bytes.__new__, bytes.__repr__, and comparison.

Map

LinesSymbolRole
1-80Bytes free listReuse single-byte PyBytesObject instances
81-180bytes.find / bytes.indexSubstring search using Boyer-Moore-Horspool
181-280bytes.split / bytes.rsplitSplit on separator or whitespace
281-380bytes.joinConcatenate iterable with separator
381-500bytes.replaceReplace all occurrences

Reading

Bytes free list

// CPython: Objects/bytesobject.c:28 characters
static PyBytesObject *characters[256];
static PyBytesObject *nullstring;

PyObject *
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
{
if (size == 0 && nullstring != NULL) {
return Py_NewRef(nullstring);
}
if (size == 1 && str != NULL) {
unsigned char c = (unsigned char)str[0];
if (characters[c] != NULL) {
return Py_NewRef(characters[c]);
}
}
/* allocate new bytes object */
...
}

Single-byte and empty bytes objects are singletons. b'\x41' always returns the same object; is comparisons on single-byte values are always True. This is the bytes counterpart of the small-integer free list.

bytes.find

// CPython: Objects/bytesobject.c:1620 bytes_find_internal
static Py_ssize_t
bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
{
Py_buffer subbuf;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
PyArg_ParseTuple(args, "y*|nn:find", &subbuf, &start, &end);
Py_ssize_t result = _Py_FindObjectEx(
(const char *)PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
(const char *)subbuf.buf, subbuf.len, start, end, dir);
PyBuffer_Release(&subbuf);
return result;
}

bytes.find delegates to _Py_FindObjectEx which dispatches to a fast SIMD path for single-character needles and Boyer-Moore-Horspool for longer ones. bytes.index calls the same function but raises ValueError on -1.

bytes.split

// CPython: Objects/bytesobject.c:1820 bytes_split
static PyObject *
bytes_split(PyBytesObject *self, PyObject *args)
{
Py_ssize_t len = PyBytes_GET_SIZE(self);
PyObject *subobj = Py_None;
int maxsplit = -1;
PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit);
if (subobj == Py_None) {
return split_whitespace((const char *)PyBytes_AS_STRING(self), len, maxsplit);
}
Py_buffer vsub;
PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE);
PyObject *list = split_char_or_str(
PyBytes_AS_STRING(self), len, vsub.buf, vsub.len, maxsplit);
PyBuffer_Release(&vsub);
return list;
}

b'a b'.split() splits on any whitespace and strips leading/trailing spaces (the None path). b'a:b'.split(b':') uses the buffer path. The maxsplit parameter limits the number of splits from the left.

bytes.join

// CPython: Objects/bytesobject.c:1940 bytes_join
static PyObject *
bytes_join(PyBytesObject *self, PyObject *iterable)
{
Py_ssize_t seplen = PyBytes_GET_SIZE(self);
PyObject *seq = PySequence_Fast(iterable, "can only join an iterable");
Py_ssize_t seqlen = PySequence_Fast_GET_SIZE(seq);
Py_ssize_t total = seplen * (seqlen - 1);
for (Py_ssize_t i = 0; i < seqlen; i++) {
total += PyBytes_GET_SIZE(PySequence_Fast_GET_ITEM(seq, i));
}
PyObject *res = PyBytes_FromStringAndSize(NULL, total);
/* second pass: copy */
...
return res;
}

bytes.join performs a two-pass algorithm: first compute the total length, then allocate once and copy. This avoids quadratic behavior from repeated concatenation.

bytes.replace

// CPython: Objects/bytesobject.c:2080 bytes_replace
static PyObject *
bytes_replace(PyBytesObject *self, PyObject *args)
{
Py_buffer from_buf, to_buf;
int count = -1;
PyArg_ParseTuple(args, "y*y*|n:replace", &from_buf, &to_buf, &count);
PyObject *result = stringlib_replace(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
from_buf.buf, from_buf.len,
to_buf.buf, to_buf.len, count);
PyBuffer_Release(&from_buf);
PyBuffer_Release(&to_buf);
return result;
}

b'aaa'.replace(b'a', b'bb') calls stringlib_replace which handles replacements that are longer, shorter, or the same length. When count=-1 all occurrences are replaced.

gopy notes

The single-byte free list is objects.BytesSingletons in objects/bytes.go, a 256-element array initialized at startup. bytes.find uses bytes.Index. bytes.split dispatches to bytes.Fields for whitespace or bytes.SplitN. bytes.join uses bytes.Join. bytes.replace uses bytes.Replace.