Skip to main content

Modules/_lzmamodule.c

Source:

cpython 3.14 @ ab2d84fe1023/Modules/_lzmamodule.c

_lzma wraps liblzma (part of xz-utils) for LZMA2/XZ compression. The pure Python lzma module (Lib/lzma.py) adds the LZMAFile class on top.

Map

LinesSymbolRole
1-100Format constantsFORMAT_XZ, FORMAT_ALONE, FORMAT_RAW
101-250LZMACompressorStreaming compressor: compress/flush
251-500LZMADecompressorStreaming decompressor: decompress
501-700lzma_compressOne-shot compress
701-900lzma_decompressOne-shot decompress
901-1200Filter encodingLZMA2 preset, delta, branch/call/jump filters

Reading

Format selection

// CPython: Modules/_lzmamodule.c:48 format constants
/* FORMAT_XZ = 1: .xz container (most common, supports stream checksums) */
/* FORMAT_ALONE = 2: .lzma legacy format (single-stream, no magic) */
/* FORMAT_RAW = 3: raw LZMA/LZMA2 stream (no container, caller provides filter spec) */
/* FORMAT_AUTO = 0: autodetect by magic number (for decompression only) */

LZMACompressor

// CPython: Modules/_lzmamodule.c:160 _lzma_LZMACompressor_impl
static PyObject *
_lzma_LZMACompressor_impl(PyTypeObject *type, int format, int check,
uint32_t preset, PyObject *filters)
{
lzma_stream *lzs = &self->lzs;
lzma_ret ret;
if (format == FORMAT_XZ)
ret = lzma_easy_encoder(lzs, preset, check);
else if (format == FORMAT_ALONE)
ret = lzma_alone_encoder(lzs, &options);
else
ret = lzma_raw_encoder(lzs, raw_filters);
...
}

static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
{
self->lzs.avail_in = data->len;
self->lzs.next_in = data->buf;
PyObject *result = grow_buffer(initial_size);
do {
lzma_ret r = lzma_code(&self->lzs, LZMA_RUN);
...
} while (self->lzs.avail_in > 0);
return result;
}

LZMADecompressor

// CPython: Modules/_lzmamodule.c:480 _lzma_LZMADecompressor_decompress_impl
static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor *self,
Py_buffer *data, Py_ssize_t max_length)
{
self->lzs.avail_in = data->len;
self->lzs.next_in = data->buf;
do {
lzma_ret r = lzma_code(&self->lzs, LZMA_RUN);
if (r == LZMA_STREAM_END) {
self->eof = 1;
/* Any bytes after the end: store in unused_data */
self->unused_data = PyBytes_FromStringAndSize(
(char *)self->lzs.next_in, self->lzs.avail_in);
break;
}
...
} while (self->lzs.avail_in > 0);
return result;
}

Filter chain

// CPython: Modules/_lzmamodule.c:940 encode_filter_spec
/* Filters can be chained: [delta_filter, lzma2_filter] */
/* Common presets: preset=6 (default) = LZMA2 with 8 MiB dict */
/* Custom: filters=[{'id': lzma.FILTER_LZMA2, 'preset': lzma.PRESET_EXTREME}] */

gopy notes

lzma is in module/lzma/ using Go's github.com/ulikunitz/xz package. FORMAT_XZ uses xz.NewWriter/xz.NewReader. FORMAT_ALONE uses the legacy lzma2 reader. FORMAT_RAW uses the raw LZMA2 streams. The preset level maps to the DictCap and LC/LP/PB parameters.