Objects/bytesobject.c (part 5)
Source:
cpython 3.14 @ ab2d84fe1023/Objects/bytesobject.c
This annotation covers search and manipulation methods. See objects_bytesobject4_detail for bytes.__new__, bytes.__repr__, and comparison.
Map
| Lines | Symbol | Role |
|---|---|---|
| 1-80 | Bytes free list | Reuse single-byte PyBytesObject instances |
| 81-180 | bytes.find / bytes.index | Substring search using Boyer-Moore-Horspool |
| 181-280 | bytes.split / bytes.rsplit | Split on separator or whitespace |
| 281-380 | bytes.join | Concatenate iterable with separator |
| 381-500 | bytes.replace | Replace all occurrences |
Reading
Bytes free list
// CPython: Objects/bytesobject.c:28 characters
static PyBytesObject *characters[256];
static PyBytesObject *nullstring;
PyObject *
PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
{
if (size == 0 && nullstring != NULL) {
return Py_NewRef(nullstring);
}
if (size == 1 && str != NULL) {
unsigned char c = (unsigned char)str[0];
if (characters[c] != NULL) {
return Py_NewRef(characters[c]);
}
}
/* allocate new bytes object */
...
}
Single-byte and empty bytes objects are singletons. b'\x41' always returns the same object; is comparisons on single-byte values are always True. This is the bytes counterpart of the small-integer free list.
bytes.find
// CPython: Objects/bytesobject.c:1620 bytes_find_internal
static Py_ssize_t
bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
{
Py_buffer subbuf;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
PyArg_ParseTuple(args, "y*|nn:find", &subbuf, &start, &end);
Py_ssize_t result = _Py_FindObjectEx(
(const char *)PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
(const char *)subbuf.buf, subbuf.len, start, end, dir);
PyBuffer_Release(&subbuf);
return result;
}
bytes.find delegates to _Py_FindObjectEx which dispatches to a fast SIMD path for single-character needles and Boyer-Moore-Horspool for longer ones. bytes.index calls the same function but raises ValueError on -1.
bytes.split
// CPython: Objects/bytesobject.c:1820 bytes_split
static PyObject *
bytes_split(PyBytesObject *self, PyObject *args)
{
Py_ssize_t len = PyBytes_GET_SIZE(self);
PyObject *subobj = Py_None;
int maxsplit = -1;
PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit);
if (subobj == Py_None) {
return split_whitespace((const char *)PyBytes_AS_STRING(self), len, maxsplit);
}
Py_buffer vsub;
PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE);
PyObject *list = split_char_or_str(
PyBytes_AS_STRING(self), len, vsub.buf, vsub.len, maxsplit);
PyBuffer_Release(&vsub);
return list;
}
b'a b'.split() splits on any whitespace and strips leading/trailing spaces (the None path). b'a:b'.split(b':') uses the buffer path. The maxsplit parameter limits the number of splits from the left.
bytes.join
// CPython: Objects/bytesobject.c:1940 bytes_join
static PyObject *
bytes_join(PyBytesObject *self, PyObject *iterable)
{
Py_ssize_t seplen = PyBytes_GET_SIZE(self);
PyObject *seq = PySequence_Fast(iterable, "can only join an iterable");
Py_ssize_t seqlen = PySequence_Fast_GET_SIZE(seq);
Py_ssize_t total = seplen * (seqlen - 1);
for (Py_ssize_t i = 0; i < seqlen; i++) {
total += PyBytes_GET_SIZE(PySequence_Fast_GET_ITEM(seq, i));
}
PyObject *res = PyBytes_FromStringAndSize(NULL, total);
/* second pass: copy */
...
return res;
}
bytes.join performs a two-pass algorithm: first compute the total length, then allocate once and copy. This avoids quadratic behavior from repeated concatenation.
bytes.replace
// CPython: Objects/bytesobject.c:2080 bytes_replace
static PyObject *
bytes_replace(PyBytesObject *self, PyObject *args)
{
Py_buffer from_buf, to_buf;
int count = -1;
PyArg_ParseTuple(args, "y*y*|n:replace", &from_buf, &to_buf, &count);
PyObject *result = stringlib_replace(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
from_buf.buf, from_buf.len,
to_buf.buf, to_buf.len, count);
PyBuffer_Release(&from_buf);
PyBuffer_Release(&to_buf);
return result;
}
b'aaa'.replace(b'a', b'bb') calls stringlib_replace which handles replacements that are longer, shorter, or the same length. When count=-1 all occurrences are replaced.
gopy notes
The single-byte free list is objects.BytesSingletons in objects/bytes.go, a 256-element array initialized at startup. bytes.find uses bytes.Index. bytes.split dispatches to bytes.Fields for whitespace or bytes.SplitN. bytes.join uses bytes.Join. bytes.replace uses bytes.Replace.