From c946f5cdd5424373b2c5a49b489344c94f297608 Mon Sep 17 00:00:00 2001 From: rgarcia <72655+rgarcia@users.noreply.github.com> Date: Sat, 28 Mar 2026 20:08:57 +0000 Subject: [PATCH] Add rate limiting documentation to API reference Documents the 429 response behavior including Retry-After, X-RateLimit-Limit, and X-RateLimit-Remaining headers, plan-based rate limits, and retry guidance. Co-Authored-By: Claude Opus 4.6 --- docs.json | 1 + info/rate-limiting.mdx | 78 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 info/rate-limiting.mdx diff --git a/docs.json b/docs.json index 8e8fd33..5a62ae5 100644 --- a/docs.json +++ b/docs.json @@ -210,6 +210,7 @@ "browsers/faq", "info/concepts", "info/pricing", + "info/rate-limiting", "info/support", "info/unikernels" ] diff --git a/info/rate-limiting.mdx b/info/rate-limiting.mdx new file mode 100644 index 0000000..d9691d5 --- /dev/null +++ b/info/rate-limiting.mdx @@ -0,0 +1,78 @@ +--- +title: "Rate Limiting" +--- + +Kernel enforces rate limits on API requests to ensure fair usage and platform stability. When you exceed the rate limit for an endpoint, the API returns a `429 Too Many Requests` response. + +## Rate limits by plan + +Rate limits are applied per organization, per endpoint. Limits are expressed in requests per minute (RPM): + +| Plan | Requests per minute | +| --- | --- | +| Developer (free) | 10 | +| Hobbyist | 25 | +| Start-Up | 100 | +| Enterprise | 250 | + +Organizations on a trial use Start-Up rate limits regardless of the selected plan. + +## Rate-limited endpoints + +The following endpoints enforce rate limits: + +| Endpoint | Method | +| --- | --- | +| `/browsers` | `POST` | + +Additional endpoints (`POST /browser-pools`, `PUT /browser-pools/:id`, `POST /invocations`) have rate limiting infrastructure in place and may be enforced in the future. + +## Response headers + +When a rate-limited endpoint is called, the API includes these headers in the response: + +| Header | Description | Example | +| --- | --- | --- | +| `X-RateLimit-Limit` | Maximum number of requests allowed (burst capacity) | `100` | +| `X-RateLimit-Remaining` | Number of requests remaining in the current window | `47` | + +These headers are included on both successful and rate-limited responses for rate-limited endpoints. + +When a request is rejected, the response also includes: + +| Header | Description | Example | +| --- | --- | --- | +| `Retry-After` | Seconds to wait before retrying | `3` | + +## How Retry-After is calculated + +Kernel uses a token bucket algorithm for rate limiting. Each organization gets a bucket with capacity equal to the RPM limit. The bucket refills at a steady rate (capacity / 60 tokens per second). + +The `Retry-After` value is the number of seconds until enough tokens have refilled to allow the request, with a minimum of 1 second. + +## Example rate-limited response + +``` +HTTP/1.1 429 Too Many Requests +Content-Type: application/json +Retry-After: 3 +X-RateLimit-Limit: 100 +X-RateLimit-Remaining: 0 + +{ + "code": "rate_limit_exceeded", + "message": "Rate limit exceeded. Please retry later." +} +``` + +## Handling rate limits + +When you receive a `429` response: + +1. Read the `Retry-After` header to determine how long to wait +2. Wait for the specified number of seconds +3. Retry the request + +For sustained workloads, use the `X-RateLimit-Remaining` header to proactively throttle requests before hitting the limit. + +If you need higher rate limits, contact us about the Enterprise plan or request a custom override.