Skip to main content

Objects/unicodeobject.c (part 4)

Source:

cpython 3.14 @ ab2d84fe1023/Objects/unicodeobject.c

This annotation covers prefix/suffix matching, encoding, and the str.format mini-language. See parts 1-3 for construction, find/replace, split/join, and strip.

Map

LinesSymbolRole
1-200unicode_startswith, unicode_endswithPrefix/suffix check, tuple support
201-400unicode_countCount non-overlapping occurrences
401-600unicode_encodestr.encode(encoding, errors)
601-1000unicode_formatstr.format() — parse format string, substitute fields
1001-1300_PyUnicode_FormatLongInteger format in str.format (d, x, o, b)
1301-1600_PyUnicode_FormatFloatFloat format in str.format (f, e, g, %)

Reading

startswith with tuple

// CPython: Objects/unicodeobject.c:88 unicode_startswith
static PyObject *
unicode_startswith(PyObject *self, PyObject *args)
{
PyObject *subobj;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
PyArg_ParseTuple(args, "O|nn:startswith", &subobj, &start, &end);
if (PyTuple_Check(subobj)) {
/* Check each element of the tuple */
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
PyObject *substring = PyTuple_GET_ITEM(subobj, i);
if (tailmatch(self, substring, start, end, -1) > 0)
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
return PyBool_FromLong(tailmatch(self, subobj, start, end, -1));
}

'hello'.startswith(('he', 'wo')) returns True — any match in the tuple suffices.

str.encode

// CPython: Objects/unicodeobject.c:490 unicode_encode
static PyObject *
unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *encoding = NULL, *errors = NULL;
PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", kwlist, &encoding, &errors);
if (encoding == NULL) encoding = "utf-8";
if (errors == NULL) errors = "strict";
return PyUnicode_AsEncodedString(self, encoding, errors);
}

str.format field parsing

// CPython: Objects/unicodeobject.c:660 unicode_format
/* Parse: '{field_name!conversion:format_spec}' */
/* field_name = auto (empty), index (0, 1, ...), or name */
/* conversion = 'r' (repr), 's' (str), 'a' (ascii) */
/* format_spec = format mini-language */
static int
parse_field(SubString *input, SubString *field_name, SubString *format_spec,
Py_UCS4 *conversion)
{
/* Find '}', handling nested '{...}' in format_spec */
...
}

'{0!r:>10}'.format('hello')" 'hello'": apply repr, then right-align in 10 chars.

_PyUnicode_FormatLong

// CPython: Objects/unicodeobject.c:1080 _PyUnicode_FormatLong
static PyObject *
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
{
/* type: 'd' decimal, 'x' hex lowercase, 'X' hex upper, 'o' octal, 'b' binary */
switch (type) {
case 'd': result = PyObject_Str(val); break;
case 'x': result = PyNumber_ToBase(val, 16); break;
case 'X': /* hex uppercase */; break;
case 'o': result = PyNumber_ToBase(val, 8); break;
case 'b': result = PyNumber_ToBase(val, 2); break;
}
/* Apply alt flag: '#' prefix (0x, 0o, 0b) */
/* Apply precision: minimum digit count */
...
}

'{:#010x}'.format(255)'0x000000ff'.

gopy notes

startswith/endswith use strings.HasPrefix/HasSuffix for single values and loop for tuples. str.encode calls codecs.Encode(s, encoding, errors). str.format is parsed by objects/strformat.go. Integer format types use strconv.FormatInt with base 2/8/10/16.