Skip to main content

Python/bootstrap_hash.c

Source:

cpython 3.14 @ ab2d84fe1023/Python/bootstrap_hash.c

bootstrap_hash.c initializes the per-process hash secret used by str.__hash__, bytes.__hash__, and datetime.__hash__. It runs before the interpreter is fully initialized, so it cannot allocate Python objects.

Map

LinesSymbolRole
1-80_Py_HashSecret_tUnion: two 64-bit halves or a fnv pair
81-200_Py_InitializeHashRandomizationRead entropy from OS, set _Py_HashSecret
201-300lcg_urandomFallback PRNG when OS entropy is unavailable
301-400py_getrandomWrapper for getrandom(2) / /dev/urandom
401-450PYTHONHASHSEEDEnv var to set a fixed seed (for reproducibility)

Reading

_Py_HashSecret_t

// CPython: Python/bootstrap_hash.c:48 _Py_HashSecret_t
typedef union {
/* secret for Sip-Hash: two 64-bit words */
struct {
uint64_t k0;
uint64_t k1;
} siphash;
/* secret for FNV (fallback): two size_t words */
struct {
Py_hash_t prefix;
Py_hash_t suffix;
} fnv;
unsigned char bytes[16];
} _Py_HashSecret_t;

extern _Py_HashSecret_t _Py_HashSecret;

_Py_HashSecret is a global initialized once at startup. Its value changes each process run unless PYTHONHASHSEED is set.

_Py_InitializeHashRandomization

// CPython: Python/bootstrap_hash.c:130 _Py_InitializeHashRandomization
void
_Py_InitializeHashRandomization(void)
{
const char *seed_str = Py_GETENV("PYTHONHASHSEED");
if (seed_str && strcmp(seed_str, "random") != 0) {
/* Fixed seed: PYTHONHASHSEED=0 disables hash randomization */
unsigned long seed = strtoul(seed_str, NULL, 10);
lcg_urandom(seed, _Py_HashSecret.bytes, sizeof(_Py_HashSecret));
return;
}
/* Random seed: fill from OS entropy */
if (py_getrandom(_Py_HashSecret.bytes, sizeof(_Py_HashSecret), 1) < 0) {
/* Fallback: use lcg seeded from clock */
lcg_urandom((uint32_t)time(NULL), _Py_HashSecret.bytes,
sizeof(_Py_HashSecret));
}
}

py_getrandom

// CPython: Python/bootstrap_hash.c:260 py_getrandom
static int
py_getrandom(void *buffer, Py_ssize_t size, int blocking)
{
#if defined(HAVE_GETRANDOM_SYSCALL)
long n = syscall(SYS_getrandom, buffer, size,
blocking ? 0 : GRND_NONBLOCK);
if (n == size) return 1;
if (n < 0 && errno == ENOSYS) { /* kernel too old */ }
#endif
#ifdef MS_WINDOWS
if (BCryptGenRandom(NULL, buffer, size, BCRYPT_USE_SYSTEM_PREFERRED_RNG))
return -1;
return 1;
#else
/* /dev/urandom fallback */
int fd = _Py_open_noraise("/dev/urandom", O_RDONLY);
...
#endif
}

PYTHONHASHSEED

PYTHONHASHSEED=0 sets the hash secret to all zeros, disabling randomization. This produces deterministic hashes across runs, useful for debugging. Setting it to any integer uses lcg_urandom seeded from that integer.

gopy notes

gopy initializes _Py_HashSecret in vm/hash_secret.go using crypto/rand.Read. PYTHONHASHSEED is checked in vm.InitHashRandomization(). The SipHash-1-3 implementation is in vm/siphash.go, matching CPython's Python/pyhash.c. When PYTHONHASHSEED=0 is set, the secret bytes are all-zero and hash results match CPython.