Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions doc/benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,32 @@ These benchmarks also have an "eager" flavor that uses asyncio eager task factor
if available.


base64
------

Benchmark the ``base64`` module's encoding and decoding functions. Each
algorithm has ``_small`` and ``_large`` variants that test both encode and
decode in a single benchmark:

* ``_small``: Balanced iterations across tiny (20B), small (127B), medium (3KB),
and 9KB data sizes
* ``_large``: Large data focus with 100KB x 10 iterations plus 1MB x 1 iteration

Available benchmarks:

* ``base64_small``, ``base64_large``: Standard Base64 encoding and decoding
(includes ``validate=True`` code path)
* ``urlsafe_base64_small``: URL-safe Base64 (small only, as URLs shouldn't
contain huge data)
* ``base32_small``, ``base32_large``: Base32 encoding and decoding
* ``base16_small``, ``base16_large``: Base16/hex encoding and decoding
* ``ascii85_small``, ``ascii85_large``: Ascii85 encoding and decoding
(includes ``wrapcol=76`` code path)
* ``base85_small``, ``base85_large``: Base85 encoding and decoding

See the `base64 module <https://docs.python.org/dev/library/base64.html>`_.


chameleon
---------

Expand Down
4 changes: 4 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 1.14.0
--------------
* Add base64 module benchmark (b64, b32, b16, a85, b85)

Version 1.13.0 (2025-10-27)
--------------
* Re-enable xdsl benchmark
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# built documents.
#
# The short X.Y version.
version = release = "1.0.6"
version = release = "1.14.0"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion pyperformance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
from importlib.metadata import distribution

VERSION = (1, 13, 0)
VERSION = (1, 14, 0)
__version__ = ".".join(map(str, VERSION))


Expand Down
1 change: 1 addition & 0 deletions pyperformance/data-files/benchmarks/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ async_tree_eager_memoization_tg <local:async_tree>
asyncio_tcp <local>
asyncio_tcp_ssl <local:asyncio_tcp>
asyncio_websockets <local>
base64 <local>
bpe_tokeniser <local>
concurrent_imap <local>
coroutines <local>
Expand Down
10 changes: 10 additions & 0 deletions pyperformance/data-files/benchmarks/bm_base64/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[project]
name = "pyperformance_bm_base64"
requires-python = ">=3.8"
dependencies = ["pyperf"]
urls = {repository = "https://github.com/python/pyperformance"}
dynamic = ["version"]

[tool.pyperformance]
name = "base64"
tags = "serialize"
274 changes: 274 additions & 0 deletions pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
"""Benchmark for the base64 module's primary public APIs.

Tests encoding and decoding performance across various variants
and data sizes, split into _small (balanced small data) and _large variants.
"""

import base64
import random
import pyperf


# Generate test data with fixed seed for reproducibility
random.seed(12345)
DATA_TINY = bytes(random.randrange(256) for _ in range(20))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

randbytes() is usually much faster and memory efficient. Or os.urandom().

DATA_SMALL = bytes(random.randrange(256) for _ in range(127)) # odd on purpose
DATA_MEDIUM = bytes(random.randrange(256) for _ in range(3072))
DATA_9K = bytes(random.randrange(256) for _ in range(9000))
DATA_LARGE = bytes(random.randrange(256) for _ in range(102400))
DATA_HUGE = bytes(random.randrange(256) for _ in range(1048576))

# Pre-encoded data for decode benchmarks
B64_TINY = base64.b64encode(DATA_TINY)
B64_SMALL = base64.b64encode(DATA_SMALL)
B64_MEDIUM = base64.b64encode(DATA_MEDIUM)
B64_9K = base64.b64encode(DATA_9K)
B64_LARGE = base64.b64encode(DATA_LARGE)
B64_HUGE = base64.b64encode(DATA_HUGE)

B64_URLSAFE_TINY = base64.urlsafe_b64encode(DATA_TINY)
B64_URLSAFE_SMALL = base64.urlsafe_b64encode(DATA_SMALL)
B64_URLSAFE_MEDIUM = base64.urlsafe_b64encode(DATA_MEDIUM)
B64_URLSAFE_9K = base64.urlsafe_b64encode(DATA_9K)

B32_TINY = base64.b32encode(DATA_TINY)
B32_SMALL = base64.b32encode(DATA_SMALL)
B32_MEDIUM = base64.b32encode(DATA_MEDIUM)
B32_9K = base64.b32encode(DATA_9K)
B32_LARGE = base64.b32encode(DATA_LARGE)
B32_HUGE = base64.b32encode(DATA_HUGE)

B16_TINY = base64.b16encode(DATA_TINY)
B16_SMALL = base64.b16encode(DATA_SMALL)
B16_MEDIUM = base64.b16encode(DATA_MEDIUM)
B16_9K = base64.b16encode(DATA_9K)
B16_LARGE = base64.b16encode(DATA_LARGE)
B16_HUGE = base64.b16encode(DATA_HUGE)

A85_TINY = base64.a85encode(DATA_TINY)
A85_SMALL = base64.a85encode(DATA_SMALL)
A85_MEDIUM = base64.a85encode(DATA_MEDIUM)
A85_9K = base64.a85encode(DATA_9K)
A85_LARGE = base64.a85encode(DATA_LARGE)
A85_HUGE = base64.a85encode(DATA_HUGE)

B85_TINY = base64.b85encode(DATA_TINY)
B85_SMALL = base64.b85encode(DATA_SMALL)
B85_MEDIUM = base64.b85encode(DATA_MEDIUM)
B85_9K = base64.b85encode(DATA_9K)
B85_LARGE = base64.b85encode(DATA_LARGE)
B85_HUGE = base64.b85encode(DATA_HUGE)


# --- Base64 (includes validate=True) ---

def bench_b64_small(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.b64encode(DATA_TINY)
base64.b64decode(B64_TINY)
base64.b64decode(B64_TINY, validate=True)
Comment on lines +70 to +72
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decoding has twice larger weight than encoding. validate=True should not affect performance for such input in reasonable implementation.

On other hand, I would test decoding from an ASCII string.

If you keep several decoding calls, you should balance them by equal number of encoding calls.

for _ in range(71):
base64.b64encode(DATA_SMALL)
base64.b64decode(B64_SMALL)
base64.b64decode(B64_SMALL, validate=True)
for _ in range(3):
base64.b64encode(DATA_MEDIUM)
base64.b64decode(B64_MEDIUM)
base64.b64decode(B64_MEDIUM, validate=True)
base64.b64encode(DATA_9K)
base64.b64decode(B64_9K)
base64.b64decode(B64_9K, validate=True)
return pyperf.perf_counter() - t0


def bench_b64_large(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(10):
base64.b64encode(DATA_LARGE)
base64.b64decode(B64_LARGE)
base64.b64decode(B64_LARGE, validate=True)
base64.b64encode(DATA_HUGE)
base64.b64decode(B64_HUGE)
base64.b64decode(B64_HUGE, validate=True)
return pyperf.perf_counter() - t0


# --- URL-safe Base64 (small only) ---

def bench_urlsafe_b64_small(loops):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They may be more rare, but there are more than 2 standard Base64 variants. For example, the variant with altchars='+,' is used for IMAP mailbox names (RFC 3501). It may be not worth to add benchmarks for them, but there would be a difference. urlsafe_*() are more optimized than altchars=.

range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.urlsafe_b64encode(DATA_TINY)
base64.urlsafe_b64decode(B64_URLSAFE_TINY)
for _ in range(71):
base64.urlsafe_b64encode(DATA_SMALL)
base64.urlsafe_b64decode(B64_URLSAFE_SMALL)
for _ in range(3):
base64.urlsafe_b64encode(DATA_MEDIUM)
base64.urlsafe_b64decode(B64_URLSAFE_MEDIUM)
base64.urlsafe_b64encode(DATA_9K)
base64.urlsafe_b64decode(B64_URLSAFE_9K)
return pyperf.perf_counter() - t0


# --- Base32 ---

def bench_b32_small(loops):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are also several variants. Some of them are implemented with preprocessing or postprocessing, this can affect performance.

range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.b32encode(DATA_TINY)
base64.b32decode(B32_TINY)
for _ in range(71):
base64.b32encode(DATA_SMALL)
base64.b32decode(B32_SMALL)
for _ in range(3):
base64.b32encode(DATA_MEDIUM)
base64.b32decode(B32_MEDIUM)
base64.b32encode(DATA_9K)
base64.b32decode(B32_9K)
return pyperf.perf_counter() - t0


def bench_b32_large(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(10):
base64.b32encode(DATA_LARGE)
base64.b32decode(B32_LARGE)
base64.b32encode(DATA_HUGE)
base64.b32decode(B32_HUGE)
return pyperf.perf_counter() - t0


# --- Base16 ---

def bench_b16_small(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.b16encode(DATA_TINY)
base64.b16decode(B16_TINY)
for _ in range(71):
base64.b16encode(DATA_SMALL)
base64.b16decode(B16_SMALL)
for _ in range(3):
base64.b16encode(DATA_MEDIUM)
base64.b16decode(B16_MEDIUM)
base64.b16encode(DATA_9K)
base64.b16decode(B16_9K)
return pyperf.perf_counter() - t0


def bench_b16_large(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(10):
base64.b16encode(DATA_LARGE)
base64.b16decode(B16_LARGE)
base64.b16encode(DATA_HUGE)
base64.b16decode(B16_HUGE)
return pyperf.perf_counter() - t0


# --- Ascii85 (includes wrapcol=76) ---

def bench_a85_small(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.a85encode(DATA_TINY)
base64.a85encode(DATA_TINY, wrapcol=76)
base64.a85decode(A85_TINY)
for _ in range(71):
base64.a85encode(DATA_SMALL)
base64.a85encode(DATA_SMALL, wrapcol=76)
base64.a85decode(A85_SMALL)
for _ in range(3):
base64.a85encode(DATA_MEDIUM)
base64.a85encode(DATA_MEDIUM, wrapcol=76)
base64.a85decode(A85_MEDIUM)
base64.a85encode(DATA_9K)
base64.a85encode(DATA_9K, wrapcol=76)
base64.a85decode(A85_9K)
return pyperf.perf_counter() - t0


def bench_a85_large(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(10):
base64.a85encode(DATA_LARGE)
base64.a85encode(DATA_LARGE, wrapcol=76)
base64.a85decode(A85_LARGE)
base64.a85encode(DATA_HUGE)
base64.a85encode(DATA_HUGE, wrapcol=76)
base64.a85decode(A85_HUGE)
return pyperf.perf_counter() - t0


# --- Base85 ---

def bench_b85_small(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(450):
base64.b85encode(DATA_TINY)
base64.b85decode(B85_TINY)
for _ in range(71):
base64.b85encode(DATA_SMALL)
base64.b85decode(B85_SMALL)
for _ in range(3):
base64.b85encode(DATA_MEDIUM)
base64.b85decode(B85_MEDIUM)
base64.b85encode(DATA_9K)
base64.b85decode(B85_9K)
return pyperf.perf_counter() - t0


def bench_b85_large(loops):
range_it = range(loops)
t0 = pyperf.perf_counter()
for _ in range_it:
for _ in range(10):
base64.b85encode(DATA_LARGE)
base64.b85decode(B85_LARGE)
base64.b85encode(DATA_HUGE)
base64.b85decode(B85_HUGE)
return pyperf.perf_counter() - t0


if __name__ == "__main__":
runner = pyperf.Runner()
runner.metadata['description'] = "Benchmark base64 module encoding/decoding"

runner.bench_time_func('base64_small', bench_b64_small)
runner.bench_time_func('base64_large', bench_b64_large)

runner.bench_time_func('urlsafe_base64_small', bench_urlsafe_b64_small)

runner.bench_time_func('base32_small', bench_b32_small)
runner.bench_time_func('base32_large', bench_b32_large)

runner.bench_time_func('base16_small', bench_b16_small)
runner.bench_time_func('base16_large', bench_b16_large)

runner.bench_time_func('ascii85_small', bench_a85_small)
runner.bench_time_func('ascii85_large', bench_a85_large)

runner.bench_time_func('base85_small', bench_b85_small)
runner.bench_time_func('base85_large', bench_b85_large)
Loading