diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md index 51fa6d3ca1c6..9a3ba4ec6df0 100644 --- a/docs/my-website/docs/proxy/reliability.md +++ b/docs/my-website/docs/proxy/reliability.md @@ -2,7 +2,7 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🔥 Load Balancing, Fallbacks, Retries, Timeouts +# Fallbacks, Load Balancing, Retries - Quick Start [load balancing](#test---load-balancing) - Quick Start [client side fallbacks](#test---client-side-fallbacks) diff --git a/docs/my-website/docs/proxy/response_headers.md b/docs/my-website/docs/proxy/response_headers.md new file mode 100644 index 000000000000..c066df1e0291 --- /dev/null +++ b/docs/my-website/docs/proxy/response_headers.md @@ -0,0 +1,24 @@ +# Rate Limit Headers + +When you make a request to the proxy, the proxy will return the following [OpenAI-compatible headers](https://platform.openai.com/docs/guides/rate-limits/rate-limits-in-headers): + +- `x-ratelimit-remaining-requests` - Optional[int]: The remaining number of requests that are permitted before exhausting the rate limit. +- `x-ratelimit-remaining-tokens` - Optional[int]: The remaining number of tokens that are permitted before exhausting the rate limit. +- `x-ratelimit-limit-requests` - Optional[int]: The maximum number of requests that are permitted before exhausting the rate limit. +- `x-ratelimit-limit-tokens` - Optional[int]: The maximum number of tokens that are permitted before exhausting the rate limit. +- `x-ratelimit-reset-requests` - Optional[int]: The time at which the rate limit will reset. +- `x-ratelimit-reset-tokens` - Optional[int]: The time at which the rate limit will reset. + +These headers are useful for clients to understand the current rate limit status and adjust their request rate accordingly. + +## How are these headers calculated? + +**If key has rate limits set** + +The proxy will return the [remaining rate limits for that key](https://github.com/BerriAI/litellm/blob/bfa95538190575f7f317db2d9598fc9a82275492/litellm/proxy/hooks/parallel_request_limiter.py#L778). + +**If key does not have rate limits set** + +The proxy returns the remaining requests/tokens returned by the backend provider. + +If the backend provider does not return these headers, the value will be `None`. diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 310bb02b7cbe..c3c5bd31588e 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -46,6 +46,7 @@ const sidebars = { "proxy/enterprise", "proxy/user_keys", "proxy/configs", + "proxy/response_headers", "proxy/reliability", { type: "category",