From 32431901e8696a12df8dfb9cd16f2bbcf0d74a8c Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 18:57:48 +0800 Subject: [PATCH 1/9] feat(backup_request): add rate-limited backup request policy (#3228) --- docs/cn/backup_request.md | 54 ++++++++++- docs/en/backup_request.md | 52 ++++++++++ src/brpc/backup_request_policy.cpp | 150 +++++++++++++++++++++++++++++ src/brpc/backup_request_policy.h | 33 +++++++ src/brpc/channel.cpp | 1 + src/brpc/channel.h | 5 +- 6 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 src/brpc/backup_request_policy.cpp diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index 6674fbf429..6b3344c1f5 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -6,7 +6,7 @@ Channel开启backup request。这个Channel会先向其中一个server发送请 示例代码见[example/backup_request_c++](https://github.com/apache/brpc/blob/master/example/backup_request_c++)。这个例子中,client设定了在2ms后发送backup request,server在碰到偶数位的请求后会故意睡眠20ms以触发backup request。 -运行后,client端和server端的日志分别如下,“index”是请求的编号。可以看到server端在收到第一个请求后会故意sleep 20ms,client端之后发送另一个同样index的请求,最终的延时并没有受到故意sleep的影响。 +运行后,client端和server端的日志分别如下,"index"是请求的编号。可以看到server端在收到第一个请求后会故意sleep 20ms,client端之后发送另一个同样index的请求,最终的延时并没有受到故意sleep的影响。 ![img](../images/backup_request_1.png) @@ -39,6 +39,58 @@ my_func_latency << tm.u_elapsed(); // u代表微秒,还有s_elapsed(), m_elap // 好了,在/vars中会显示my_func_qps, my_func_latency, my_func_latency_cdf等很多计数器。 ``` +## Backup Request 限流 + +如需限制 backup request 的发送比例,可实现 `BackupRequestPolicy` 接口或直接使用内置工厂函数。 + +### 使用自定义 BackupRequestPolicy + +如需完全控制,可实现 `BackupRequestPolicy` 接口并设置到 `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" + +class MyBackupPolicy : public brpc::BackupRequestPolicy { +public: + int32_t GetBackupRequestMs(const brpc::Controller*) const override { + return 10; // 10ms后发送backup + } + bool DoBackup(const brpc::Controller*) const override { + return should_allow_backup(); // 自定义逻辑 + } + void OnRPCEnd(const brpc::Controller*) override { + // 每次RPC结束时调用,可在此更新统计 + } +}; + +MyBackupPolicy my_policy; +brpc::ChannelOptions options; +options.backup_request_policy = &my_policy; // Channel不拥有该对象,需保证其生命周期长于Channel +channel.Init(..., &options); +``` + +完整优先级顺序:`backup_request_policy` > `backup_request_ms`。 + +如需使用内置限流逻辑但想自定义参数,也可直接调用工厂函数: + +```c++ +// 返回的指针由调用方负责释放。 +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; +opts.max_backup_ratio = 0.3; +opts.window_size_seconds = 10; +opts.update_interval_seconds = 5; +brpc::BackupRequestPolicy* policy = brpc::CreateRateLimitedBackupPolicy(opts); +options.backup_request_policy = policy; +// ... Channel销毁后记得delete policy +``` + +### 实现说明 + +- 比例通过bvar计数器在滑动时间窗口内统计。缓存值通过无锁CAS选举最多每 `update_interval_seconds` 刷新一次,因此每次RPC的开销极低(公共路径仅有两次原子读)。 +- Backup决策在做出时立即计数(RPC完成前),以便在延迟抖动期间更快地反馈。总RPC数在完成时统计。这意味着比例在抖动期间可能短暂滞后,这是设计有意为之——限流器的目标是近似的尽力而为的节流,而非精确执行。 +- 每个使用限流的Channel会维护两个 `bvar::Window` 采样任务,在Channel数量极多的部署中请留意此开销。 + # 当后端server不能挂在一个命名服务内时 【推荐】建立一个开启backup request的SelectiveChannel,其中包含两个sub channel。访问这个SelectiveChannel和上面的情况类似,会先访问一个sub channel,如果在ChannelOptions.backup_request_ms后没返回,再访问另一个sub channel。如果一个sub channel对应一个集群,这个方法就是在两个集群间做互备。SelectiveChannel的例子见[example/selective_echo_c++](https://github.com/apache/brpc/tree/master/example/selective_echo_c++),具体做法请参考上面的过程。 diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index 8e1a337c41..cdee4bc59d 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -39,6 +39,58 @@ my_func_latency << tm.u_elapsed(); // u represents for microsecond, and s_elaps // All work is done here. My_func_qps, my_func_latency, my_func_latency_cdf and many other counters would be shown in /vars. ``` +## Rate-limited backup requests + +To limit the ratio of backup requests sent, implement the `BackupRequestPolicy` interface or use the built-in factory function directly. + +### Using a custom BackupRequestPolicy + +For full control, implement the `BackupRequestPolicy` interface and set it on `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" + +class MyBackupPolicy : public brpc::BackupRequestPolicy { +public: + int32_t GetBackupRequestMs(const brpc::Controller*) const override { + return 10; // send backup after 10ms + } + bool DoBackup(const brpc::Controller*) const override { + return should_allow_backup(); // your logic here + } + void OnRPCEnd(const brpc::Controller*) override { + // called on every RPC completion; update stats if needed + } +}; + +MyBackupPolicy my_policy; +brpc::ChannelOptions options; +options.backup_request_policy = &my_policy; // NOT owned by channel; must outlive channel +channel.Init(..., &options); +``` + +The full priority order is: `backup_request_policy` > `backup_request_ms`. + +If you want rate limiting with custom parameters rather than the channel-level defaults, you can also use the built-in factory directly: + +```c++ +// Caller owns the returned pointer. +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; +opts.max_backup_ratio = 0.3; +opts.window_size_seconds = 10; +opts.update_interval_seconds = 5; +brpc::BackupRequestPolicy* policy = brpc::CreateRateLimitedBackupPolicy(opts); +options.backup_request_policy = policy; +// ... remember to delete policy after the channel is destroyed +``` + +### Implementation notes + +- The ratio is computed over a sliding time window using bvar counters. The cached value is refreshed at most once per `update_interval_seconds` using a lock-free CAS election, so the overhead per RPC is very low (two atomic loads in the common path). +- Backup decisions are counted immediately at decision time (before the RPC completes) to provide faster feedback during latency spikes. Total RPCs are counted on completion. This means the ratio may transiently lag during a spike, but this is intentional — the limiter is designed for approximate, best-effort throttling, not exact enforcement. +- Each channel using rate limiting maintains two `bvar::Window` sampler tasks. Keep this in mind in deployments with a very large number of channels. + # When backend servers cannot be hung in a naming service [Recommended] Define a SelectiveChannel that sets backup request, in which contains two sub channel. The visiting process of this SelectiveChannel is similar to the above situation. It will visit one sub channel first. If the response is not returned after channelOptions.backup_request_ms ms, then another sub channel is visited. If a sub channel corresponds to a cluster, this method does backups between two clusters. An example of SelectiveChannel can be found in [example/selective_echo_c++](https://github.com/apache/brpc/tree/master/example/selective_echo_c++). More details please refer to the above program. diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp new file mode 100644 index 0000000000..0d313137a9 --- /dev/null +++ b/src/brpc/backup_request_policy.cpp @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "brpc/backup_request_policy.h" + +#include "butil/logging.h" +#include "bvar/reducer.h" +#include "bvar/window.h" +#include "butil/atomicops.h" +#include "butil/time.h" + +namespace brpc { + +// Standalone statistics module for tracking backup/total request ratio +// within a sliding time window. Each instance schedules two bvar::Window +// sampler tasks; keep this in mind for high channel-count deployments. +class BackupRateLimiter { +public: + BackupRateLimiter(double max_backup_ratio, + int window_size_seconds, + int update_interval_seconds) + : _max_backup_ratio(max_backup_ratio) + , _update_interval_us(update_interval_seconds * 1000000LL) + , _total_count() + , _backup_count() + , _total_window(&_total_count, window_size_seconds) + , _backup_window(&_backup_count, window_size_seconds) + , _cached_ratio(0.0) + , _last_update_us(0) { + } + + // All atomic operations use relaxed ordering intentionally. + // This is best-effort rate limiting: a slightly stale ratio is + // acceptable for approximate throttling. + bool ShouldAllow() const { + const int64_t now_us = butil::cpuwide_time_us(); + int64_t last_us = _last_update_us.load(butil::memory_order_relaxed); + double ratio = _cached_ratio.load(butil::memory_order_relaxed); + + if (now_us - last_us >= _update_interval_us) { + if (_last_update_us.compare_exchange_strong( + last_us, now_us, butil::memory_order_relaxed)) { + int64_t total = _total_window.get_value(); + int64_t backup = _backup_window.get_value(); + ratio = (total > 0) ? static_cast(backup) / total : 0.0; + _cached_ratio.store(ratio, butil::memory_order_relaxed); + } + } + + // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0). + bool allow = _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio; + if (allow) { + // Count backup decisions immediately for faster feedback + // during latency spikes (before RPCs complete). + _backup_count << 1; + } + return allow; + } + + void OnRPCEnd(const Controller* /*controller*/) { + // Count all completed RPCs. Backup decisions are counted + // in ShouldAllow() at decision time for faster feedback. + _total_count << 1; + } + +private: + double _max_backup_ratio; + int64_t _update_interval_us; + + bvar::Adder _total_count; + mutable bvar::Adder _backup_count; + bvar::Window> _total_window; + bvar::Window> _backup_window; + + mutable butil::atomic _cached_ratio; + mutable butil::atomic _last_update_us; +}; + +// Internal BackupRequestPolicy that composes a BackupRateLimiter +// for ratio-based suppression. +class RateLimitedBackupPolicy : public BackupRequestPolicy { +public: + RateLimitedBackupPolicy(int32_t backup_request_ms, + double max_backup_ratio, + int window_size_seconds, + int update_interval_seconds) + : _backup_request_ms(backup_request_ms) + , _rate_limiter(max_backup_ratio, window_size_seconds, + update_interval_seconds) { + } + + int32_t GetBackupRequestMs(const Controller* /*controller*/) const override { + return _backup_request_ms; + } + + bool DoBackup(const Controller* /*controller*/) const override { + return _rate_limiter.ShouldAllow(); + } + + void OnRPCEnd(const Controller* controller) override { + _rate_limiter.OnRPCEnd(controller); + } + +private: + int32_t _backup_request_ms; + BackupRateLimiter _rate_limiter; +}; + +BackupRequestPolicy* CreateRateLimitedBackupPolicy( + const RateLimitedBackupPolicyOptions& options) { + if (options.backup_request_ms < 0) { + LOG(ERROR) << "Invalid backup_request_ms=" << options.backup_request_ms + << ", must be >= 0"; + return NULL; + } + if (options.max_backup_ratio <= 0 || options.max_backup_ratio > 1.0) { + LOG(ERROR) << "Invalid max_backup_ratio=" << options.max_backup_ratio + << ", must be in (0, 1]"; + return NULL; + } + if (options.window_size_seconds < 1 || options.window_size_seconds > 3600) { + LOG(ERROR) << "Invalid window_size_seconds=" << options.window_size_seconds + << ", must be in [1, 3600]"; + return NULL; + } + if (options.update_interval_seconds < 1) { + LOG(ERROR) << "Invalid update_interval_seconds=" + << options.update_interval_seconds << ", must be >= 1"; + return NULL; + } + return new RateLimitedBackupPolicy( + options.backup_request_ms, options.max_backup_ratio, + options.window_size_seconds, options.update_interval_seconds); +} + +} // namespace brpc diff --git a/src/brpc/backup_request_policy.h b/src/brpc/backup_request_policy.h index ea254f1dbf..470e8af52c 100644 --- a/src/brpc/backup_request_policy.h +++ b/src/brpc/backup_request_policy.h @@ -38,6 +38,39 @@ class BackupRequestPolicy { virtual void OnRPCEnd(const Controller* controller) = 0; }; +// Options for CreateRateLimitedBackupPolicy(). +// All fields have defaults matching the recommended starting values. +struct RateLimitedBackupPolicyOptions { + // Time in milliseconds after which a backup request is sent if the RPC + // has not completed. Must be >= 0. + // Default: 0 + int32_t backup_request_ms = 0; + + // Maximum ratio of backup requests to total requests in the sliding + // window. Must be in (0, 1]. + // Default: 0.1 + double max_backup_ratio = 0.1; + + // Width of the sliding time window in seconds. Must be in [1, 3600]. + // Default: 10 + int window_size_seconds = 10; + + // Interval in seconds between cached-ratio refreshes. Must be >= 1. + // Default: 5 + int update_interval_seconds = 5; +}; + +// Create a BackupRequestPolicy that limits the ratio of backup requests +// to total requests within a sliding time window. When the ratio reaches +// or exceeds options.max_backup_ratio, DoBackup() returns false. +// NOTE: Backup decisions are counted immediately at DoBackup() time for +// fast feedback. Total RPCs are counted on completion (OnRPCEnd). During +// latency spikes the ratio may temporarily lag until RPCs complete. +// Returns NULL on invalid parameters. +// The caller owns the returned pointer. +BackupRequestPolicy* CreateRateLimitedBackupPolicy( + const RateLimitedBackupPolicyOptions& options); + } #endif // BRPC_BACKUP_REQUEST_POLICY_H diff --git a/src/brpc/channel.cpp b/src/brpc/channel.cpp index 86124c2552..dde4ca0f8c 100644 --- a/src/brpc/channel.cpp +++ b/src/brpc/channel.cpp @@ -242,6 +242,7 @@ int Channel::InitChannelOptions(const ChannelOptions* options) { if (!cg.empty() && (::isspace(cg.front()) || ::isspace(cg.back()))) { butil::TrimWhitespace(cg, butil::TRIM_ALL, &cg); } + return 0; } diff --git a/src/brpc/channel.h b/src/brpc/channel.h index 7c257c05d3..28a17ac8ea 100644 --- a/src/brpc/channel.h +++ b/src/brpc/channel.h @@ -118,9 +118,10 @@ struct ChannelOptions { // Customize the backup request time and whether to send backup request. // Priority: `backup_request_policy' > `backup_request_ms'. - // Overridable by Controller.set_backup_request_ms() or + // Overridable per-RPC by Controller.set_backup_request_ms() or // Controller.set_backup_request_policy(). - // This object is NOT owned by channel and should remain valid when channel is used. + // This object is NOT owned by channel and should remain valid during + // channel's lifetime. // Default: NULL BackupRequestPolicy* backup_request_policy; From 2da7b6743c84de4fdd62ed142f0406db75eccf41 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 19:46:16 +0800 Subject: [PATCH 2/9] docs(backup_request): restructure rate-limiting section, add lifecycle guidance - Promote built-in factory function to its own subsection (before custom interface) - Add unique_ptr usage example for policy lifetime management - Add RateLimitedBackupPolicyOptions parameter table with defaults/constraints - Document NULL return on invalid params - Keep cn/en docs in sync --- docs/cn/backup_request.md | 56 ++++++++++++++++++++++++++------------ docs/en/backup_request.md | 57 +++++++++++++++++++++++++++------------ 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index 6b3344c1f5..f12ed2101f 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -41,7 +41,45 @@ my_func_latency << tm.u_elapsed(); // u代表微秒,还有s_elapsed(), m_elap ## Backup Request 限流 -如需限制 backup request 的发送比例,可实现 `BackupRequestPolicy` 接口或直接使用内置工厂函数。 +如需限制 backup request 的发送比例,可使用内置工厂函数创建限流策略,也可自行实现 `BackupRequestPolicy` 接口。 + +优先级顺序:`backup_request_policy` > `backup_request_ms`。 + +### 使用内置限流策略 + +调用 `CreateRateLimitedBackupPolicy` 创建限流策略,并将其设置到 `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" +#include + +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; // 超过10ms未返回时发送backup请求 +opts.max_backup_ratio = 0.3; // backup请求比例上限30% +opts.window_size_seconds = 10; // 滑动窗口宽度(秒) +opts.update_interval_seconds = 5; // 缓存比例的刷新间隔(秒) + +// CreateRateLimitedBackupPolicy返回的指针由调用方负责释放。 +// 推荐使用unique_ptr管理生命周期,确保policy在Channel销毁后才释放。 +std::unique_ptr policy( + brpc::CreateRateLimitedBackupPolicy(opts)); + +brpc::ChannelOptions options; +options.backup_request_policy = policy.get(); // Channel不拥有该对象 +channel.Init(..., &options); +// policy在unique_ptr析构时自动释放,确保其生命周期长于channel即可。 +``` + +参数说明(`RateLimitedBackupPolicyOptions`): + +| 字段 | 默认值 | 说明 | +|------|--------|------| +| `backup_request_ms` | 0 | 超时阈值(毫秒),必须 >= 0 | +| `max_backup_ratio` | 0.1 | backup比例上限,取值范围 (0, 1] | +| `window_size_seconds` | 10 | 滑动窗口宽度(秒),取值范围 [1, 3600] | +| `update_interval_seconds` | 5 | 缓存刷新间隔(秒),必须 >= 1 | + +参数不合法时 `CreateRateLimitedBackupPolicy` 返回 `NULL`。 ### 使用自定义 BackupRequestPolicy @@ -69,22 +107,6 @@ options.backup_request_policy = &my_policy; // Channel不拥有该对象,需 channel.Init(..., &options); ``` -完整优先级顺序:`backup_request_policy` > `backup_request_ms`。 - -如需使用内置限流逻辑但想自定义参数,也可直接调用工厂函数: - -```c++ -// 返回的指针由调用方负责释放。 -brpc::RateLimitedBackupPolicyOptions opts; -opts.backup_request_ms = 10; -opts.max_backup_ratio = 0.3; -opts.window_size_seconds = 10; -opts.update_interval_seconds = 5; -brpc::BackupRequestPolicy* policy = brpc::CreateRateLimitedBackupPolicy(opts); -options.backup_request_policy = policy; -// ... Channel销毁后记得delete policy -``` - ### 实现说明 - 比例通过bvar计数器在滑动时间窗口内统计。缓存值通过无锁CAS选举最多每 `update_interval_seconds` 刷新一次,因此每次RPC的开销极低(公共路径仅有两次原子读)。 diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index cdee4bc59d..1fb07623af 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -41,7 +41,46 @@ my_func_latency << tm.u_elapsed(); // u represents for microsecond, and s_elaps ## Rate-limited backup requests -To limit the ratio of backup requests sent, implement the `BackupRequestPolicy` interface or use the built-in factory function directly. +To limit the ratio of backup requests sent, use the built-in factory function or implement the `BackupRequestPolicy` interface yourself. + +Priority order: `backup_request_policy` > `backup_request_ms`. + +### Using the built-in rate-limiting policy + +Call `CreateRateLimitedBackupPolicy` and set the result on `ChannelOptions.backup_request_policy`: + +```c++ +#include "brpc/backup_request_policy.h" +#include + +brpc::RateLimitedBackupPolicyOptions opts; +opts.backup_request_ms = 10; // send backup if RPC does not complete within 10ms +opts.max_backup_ratio = 0.3; // cap backup requests at 30% of total +opts.window_size_seconds = 10; // sliding window width in seconds +opts.update_interval_seconds = 5; // how often the cached ratio is refreshed + +// The caller owns the returned pointer. +// Use unique_ptr to manage the lifetime; ensure the policy outlives the channel. +std::unique_ptr policy( + brpc::CreateRateLimitedBackupPolicy(opts)); + +brpc::ChannelOptions options; +options.backup_request_policy = policy.get(); // NOT owned by channel +channel.Init(..., &options); +// policy is released automatically when unique_ptr goes out of scope, +// as long as it outlives the channel. +``` + +`RateLimitedBackupPolicyOptions` fields: + +| Field | Default | Description | +|-------|---------|-------------| +| `backup_request_ms` | 0 | Timeout threshold in ms; must be >= 0 | +| `max_backup_ratio` | 0.1 | Max backup ratio; range (0, 1] | +| `window_size_seconds` | 10 | Sliding window width in seconds; range [1, 3600] | +| `update_interval_seconds` | 5 | Cached-ratio refresh interval in seconds; must be >= 1 | + +`CreateRateLimitedBackupPolicy` returns `NULL` if any parameter is invalid. ### Using a custom BackupRequestPolicy @@ -69,22 +108,6 @@ options.backup_request_policy = &my_policy; // NOT owned by channel; must outliv channel.Init(..., &options); ``` -The full priority order is: `backup_request_policy` > `backup_request_ms`. - -If you want rate limiting with custom parameters rather than the channel-level defaults, you can also use the built-in factory directly: - -```c++ -// Caller owns the returned pointer. -brpc::RateLimitedBackupPolicyOptions opts; -opts.backup_request_ms = 10; -opts.max_backup_ratio = 0.3; -opts.window_size_seconds = 10; -opts.update_interval_seconds = 5; -brpc::BackupRequestPolicy* policy = brpc::CreateRateLimitedBackupPolicy(opts); -options.backup_request_policy = policy; -// ... remember to delete policy after the channel is destroyed -``` - ### Implementation notes - The ratio is computed over a sliding time window using bvar counters. The cached value is refreshed at most once per `update_interval_seconds` using a lock-free CAS election, so the overhead per RPC is very low (two atomic loads in the common path). From c64f6fbaba1dd02321de026ef8ea1785c73c53c5 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 22:10:07 +0800 Subject: [PATCH 3/9] =?UTF-8?q?fix(backup=5Frequest):=20address=20review?= =?UTF-8?q?=20issues=20=E2=80=94=20sentinel=20fallback,=20comments,=20test?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - controller.cpp: When policy returns -1 (inherit sentinel), fall back to _backup_request_ms set from ChannelOptions, so backup timer is actually armed when using a policy with backup_request_ms=-1. - backup_request_policy.cpp: Clarify OnRPCEnd comment to say 'RPC legs' (both original and backup completions counted as denominator). - backup_request_policy.cpp: Warn when update_interval_seconds exceeds window_size_seconds (window would rarely refresh within its period). - backup_request_policy.h: Fix comment typo ('Called when an RPC ends'). - brpc_channel_unittest.cpp: Replace nullptr with NULL to match codebase convention; use ASSERT_TRUE(p != NULL) for unique_ptr null checks. - brpc_channel_unittest.cpp: Add ValidMaxRatioAtBoundary behavioral assert and AfterColdStartBackupSuppressedUntilRpcCompletes test. --- src/brpc/backup_request_policy.cpp | 36 ++++++++-- src/brpc/backup_request_policy.h | 10 +-- src/brpc/controller.cpp | 10 ++- test/brpc_channel_unittest.cpp | 108 +++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 13 deletions(-) diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp index 0d313137a9..3b45251415 100644 --- a/src/brpc/backup_request_policy.cpp +++ b/src/brpc/backup_request_policy.cpp @@ -56,13 +56,27 @@ class BackupRateLimiter { last_us, now_us, butil::memory_order_relaxed)) { int64_t total = _total_window.get_value(); int64_t backup = _backup_window.get_value(); - ratio = (total > 0) ? static_cast(backup) / total : 0.0; + // Fall back to cumulative counts when the window has no + // sampled data yet (cold-start within the first few seconds). + if (total <= 0) { + total = _total_count.get_value(); + backup = _backup_count.get_value(); + } + if (total > 0) { + ratio = static_cast(backup) / total; + } else if (backup > 0) { + // Backups issued but no completions in window yet (latency spike). + // Be conservative to prevent backup storms. + ratio = 1.0; + } else { + // True cold-start: no traffic yet. Allow freely. + ratio = 0.0; + } _cached_ratio.store(ratio, butil::memory_order_relaxed); } } - // max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0). - bool allow = _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio; + bool allow = ratio < _max_backup_ratio; if (allow) { // Count backup decisions immediately for faster feedback // during latency spikes (before RPCs complete). @@ -72,8 +86,11 @@ class BackupRateLimiter { } void OnRPCEnd(const Controller* /*controller*/) { - // Count all completed RPCs. Backup decisions are counted - // in ShouldAllow() at decision time for faster feedback. + // Count all completed RPC legs (both original and backup RPCs). + // Backup decisions are counted in ShouldAllow() at decision time for + // faster feedback. As a result, the effective suppression threshold is + // (backup_count / total_legs), where total_legs includes both original + // and backup completions. _total_count << 1; } @@ -122,9 +139,9 @@ class RateLimitedBackupPolicy : public BackupRequestPolicy { BackupRequestPolicy* CreateRateLimitedBackupPolicy( const RateLimitedBackupPolicyOptions& options) { - if (options.backup_request_ms < 0) { + if (options.backup_request_ms < -1) { LOG(ERROR) << "Invalid backup_request_ms=" << options.backup_request_ms - << ", must be >= 0"; + << ", must be >= -1 (-1 means inherit from ChannelOptions)"; return NULL; } if (options.max_backup_ratio <= 0 || options.max_backup_ratio > 1.0) { @@ -142,6 +159,11 @@ BackupRequestPolicy* CreateRateLimitedBackupPolicy( << options.update_interval_seconds << ", must be >= 1"; return NULL; } + if (options.update_interval_seconds > options.window_size_seconds) { + LOG(WARNING) << "update_interval_seconds=" << options.update_interval_seconds + << " exceeds window_size_seconds=" << options.window_size_seconds + << "; the ratio window will rarely refresh within its own period"; + } return new RateLimitedBackupPolicy( options.backup_request_ms, options.max_backup_ratio, options.window_size_seconds, options.update_interval_seconds); diff --git a/src/brpc/backup_request_policy.h b/src/brpc/backup_request_policy.h index 470e8af52c..0009b682e5 100644 --- a/src/brpc/backup_request_policy.h +++ b/src/brpc/backup_request_policy.h @@ -34,7 +34,7 @@ class BackupRequestPolicy { // Return true if the backup request should be sent. virtual bool DoBackup(const Controller* controller) const = 0; - // Called when a rpc is end, user can collect call information to adjust policy. + // Called when an RPC ends; user can collect call information to adjust policy. virtual void OnRPCEnd(const Controller* controller) = 0; }; @@ -42,9 +42,11 @@ class BackupRequestPolicy { // All fields have defaults matching the recommended starting values. struct RateLimitedBackupPolicyOptions { // Time in milliseconds after which a backup request is sent if the RPC - // has not completed. Must be >= 0. - // Default: 0 - int32_t backup_request_ms = 0; + // has not completed. + // Use -1 (the default) to inherit the value from ChannelOptions.backup_request_ms. + // Use >= 0 to override it explicitly. + // Default: -1 + int32_t backup_request_ms = -1; // Maximum ratio of backup requests to total requests in the sliding // window. Must be in (0, 1]. diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index d3821eca80..0db48baddc 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -351,8 +351,14 @@ void Controller::set_backup_request_ms(int64_t timeout_ms) { } int64_t Controller::backup_request_ms() const { - int timeout_ms = NULL != _backup_request_policy ? - _backup_request_policy->GetBackupRequestMs(this) : _backup_request_ms; + int timeout_ms = _backup_request_ms; + if (NULL != _backup_request_policy) { + const int32_t policy_ms = _backup_request_policy->GetBackupRequestMs(this); + // -1 means the policy defers to the channel-level backup_request_ms. + if (policy_ms >= 0) { + timeout_ms = policy_ms; + } + } if (timeout_ms > 0x7fffffff) { timeout_ms = 0x7fffffff; LOG(WARNING) << "backup_request_ms is limited to 0x7fffffff (roughly 24 days)"; diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index 86bee89105..004267899c 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -3078,4 +3078,112 @@ TEST_F(ChannelTest, adaptive_protocol_type) { ASSERT_EQ("", ptype.param()); } +class RateLimitedBackupPolicyTest : public ::testing::Test {}; + +TEST_F(RateLimitedBackupPolicyTest, InvalidBackupRequestMs) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = -2; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioZero) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = 0.0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioNegative) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = -0.1; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidMaxBackupRatioAboveOne) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.max_backup_ratio = 1.001; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidWindowSizeTooSmall) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.window_size_seconds = 0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidWindowSizeTooLarge) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.window_size_seconds = 3601; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, InvalidUpdateIntervalTooSmall) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 100; + opts.update_interval_seconds = 0; + ASSERT_EQ(NULL, brpc::CreateRateLimitedBackupPolicy(opts)); +} + +TEST_F(RateLimitedBackupPolicyTest, ValidMinusOneBackupRequestMsInherits) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = -1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + ASSERT_EQ(-1, p->GetBackupRequestMs(NULL)); +} + +TEST_F(RateLimitedBackupPolicyTest, ValidMaxRatioAtBoundary) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 50; + opts.max_backup_ratio = 1.0; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // With max_backup_ratio=1.0, backup is allowed until backup_count/total + // reaches 1.0. At cold start (total=0) the conservative path applies: + // ratio is treated as 1.0, so DoBackup() returns false only after the + // first backup is counted. + ASSERT_TRUE(p->DoBackup(NULL)); // cold start: no total yet, allow first +} + +TEST_F(RateLimitedBackupPolicyTest, ColdStartAllowsBackup) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + ASSERT_TRUE(p->DoBackup(NULL)); +} + +// After the first backup fires (backup_count=1, total_count=0), a refresh +// will compute ratio=1.0 via the conservative path (total==0 guard) which +// exceeds any max_backup_ratio < 1.0, so subsequent calls are suppressed +// until at least one RPC leg completes. +TEST_F(RateLimitedBackupPolicyTest, AfterColdStartBackupSuppressedUntilRpcCompletes) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.1; + opts.window_size_seconds = 1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // First call fires (cold start). + ASSERT_TRUE(p->DoBackup(NULL)); + // Simulate a ratio refresh by calling DoBackup many times without any + // OnRPCEnd — eventually the interval elapses and the cached ratio is + // refreshed to 1.0 (conservative path). For a unit test we cannot + // sleep, so we verify the documented invariant: the policy object was + // successfully created and is usable. + // The behavioral guarantee (suppression after cold-start) is verified + // through the conservative-ratio code path in BackupRateLimiter::ShouldAllow(). +} + } //namespace From 791b04d16344a128fe983a01c915d48ef661c303 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 22:19:42 +0800 Subject: [PATCH 4/9] fix(backup_request): correct docs table defaults and add suppression test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docs: fix backup_request_ms default (0→-1) and constraint (>=0→>=-1); add note that -1 inherit only works via ChannelOptions injection path, not Controller::set_backup_request_policy(). - test: replace no-op AfterColdStart test with a real behavioral assertion: after cold-start backup fires, wait 1.2s for ratio refresh, verify DoBackup() returns false (conservative ratio=1.0 path triggers). --- docs/cn/backup_request.md | 2 +- docs/en/backup_request.md | 2 +- test/brpc_channel_unittest.cpp | 22 ++++++++++------------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index f12ed2101f..db2125768f 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -74,7 +74,7 @@ channel.Init(..., &options); | 字段 | 默认值 | 说明 | |------|--------|------| -| `backup_request_ms` | 0 | 超时阈值(毫秒),必须 >= 0 | +| `backup_request_ms` | -1 | 超时阈值(毫秒);-1 表示继承 `ChannelOptions.backup_request_ms`,必须 >= -1。**仅在通过 `ChannelOptions.backup_request_policy` 设置策略时有效;通过 Controller 注入时必须显式指定 >= 0 的值。** | | `max_backup_ratio` | 0.1 | backup比例上限,取值范围 (0, 1] | | `window_size_seconds` | 10 | 滑动窗口宽度(秒),取值范围 [1, 3600] | | `update_interval_seconds` | 5 | 缓存刷新间隔(秒),必须 >= 1 | diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index 1fb07623af..5f0a1b0c7d 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -75,7 +75,7 @@ channel.Init(..., &options); | Field | Default | Description | |-------|---------|-------------| -| `backup_request_ms` | 0 | Timeout threshold in ms; must be >= 0 | +| `backup_request_ms` | -1 | Timeout threshold in ms; -1 means inherit from `ChannelOptions.backup_request_ms`; must be >= -1. **Only effective when the policy is set via `ChannelOptions.backup_request_policy`; controller-level injection always requires an explicit >= 0 value.** | | `max_backup_ratio` | 0.1 | Max backup ratio; range (0, 1] | | `window_size_seconds` | 10 | Sliding window width in seconds; range [1, 3600] | | `update_interval_seconds` | 5 | Cached-ratio refresh interval in seconds; must be >= 1 | diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index 004267899c..e358397ca3 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -3162,10 +3162,10 @@ TEST_F(RateLimitedBackupPolicyTest, ColdStartAllowsBackup) { ASSERT_TRUE(p->DoBackup(NULL)); } -// After the first backup fires (backup_count=1, total_count=0), a refresh -// will compute ratio=1.0 via the conservative path (total==0 guard) which -// exceeds any max_backup_ratio < 1.0, so subsequent calls are suppressed -// until at least one RPC leg completes. +// After the first backup fires (backup_count=1, total_count=0), once the +// update interval elapses the ratio is refreshed via the conservative path +// (total==0 → ratio=1.0), which exceeds max_backup_ratio < 1.0, so +// subsequent DoBackup() calls are suppressed until an RPC leg completes. TEST_F(RateLimitedBackupPolicyTest, AfterColdStartBackupSuppressedUntilRpcCompletes) { brpc::RateLimitedBackupPolicyOptions opts; opts.backup_request_ms = 10; @@ -3175,15 +3175,13 @@ TEST_F(RateLimitedBackupPolicyTest, AfterColdStartBackupSuppressedUntilRpcComple std::unique_ptr p( brpc::CreateRateLimitedBackupPolicy(opts)); ASSERT_TRUE(p != NULL); - // First call fires (cold start). + // First call fires (cold start: total=0, backup=0 → ratio=0.0 → allow). ASSERT_TRUE(p->DoBackup(NULL)); - // Simulate a ratio refresh by calling DoBackup many times without any - // OnRPCEnd — eventually the interval elapses and the cached ratio is - // refreshed to 1.0 (conservative path). For a unit test we cannot - // sleep, so we verify the documented invariant: the policy object was - // successfully created and is usable. - // The behavioral guarantee (suppression after cold-start) is verified - // through the conservative-ratio code path in BackupRateLimiter::ShouldAllow(). + // Wait for the update interval to elapse so the ratio refreshes. + // After refresh: total=0 but backup=1 → conservative path sets ratio=1.0, + // which is >= max_backup_ratio (0.1), so DoBackup() must return false. + bthread_usleep(1200000); // 1.2s > update_interval_seconds=1 + ASSERT_FALSE(p->DoBackup(NULL)); } } //namespace From 77a35bdbfdcc8cee5f1ddb8ecd82936517dcee15 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 22:36:37 +0800 Subject: [PATCH 5/9] =?UTF-8?q?fix(backup=5Frequest):=20clarify=20comments?= =?UTF-8?q?=20=E2=80=94=20negative=20defer=20semantics=20and=20burst=20cav?= =?UTF-8?q?eat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/brpc/backup_request_policy.cpp | 4 +++- src/brpc/controller.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp index 3b45251415..78784824e0 100644 --- a/src/brpc/backup_request_policy.cpp +++ b/src/brpc/backup_request_policy.cpp @@ -45,7 +45,9 @@ class BackupRateLimiter { // All atomic operations use relaxed ordering intentionally. // This is best-effort rate limiting: a slightly stale ratio is - // acceptable for approximate throttling. + // acceptable for approximate throttling. Within a single update interval, + // the cached ratio is not updated, so bursts up to update_interval_seconds + // in duration can exceed the configured max_backup_ratio transiently. bool ShouldAllow() const { const int64_t now_us = butil::cpuwide_time_us(); int64_t last_us = _last_update_us.load(butil::memory_order_relaxed); diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index 0db48baddc..dc2f7af7bc 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -354,7 +354,9 @@ int64_t Controller::backup_request_ms() const { int timeout_ms = _backup_request_ms; if (NULL != _backup_request_policy) { const int32_t policy_ms = _backup_request_policy->GetBackupRequestMs(this); - // -1 means the policy defers to the channel-level backup_request_ms. + // Any negative value means the policy defers to the channel-level + // backup_request_ms (set from ChannelOptions). The canonical sentinel + // is -1, but all negative values are treated the same way. if (policy_ms >= 0) { timeout_ms = policy_ms; } From 378f6f6c5af318ee4364fd97b65de520d1da561e Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 22:50:16 +0800 Subject: [PATCH 6/9] =?UTF-8?q?fix(backup=5Frequest):=20address=20Copilot?= =?UTF-8?q?=20review=20=E2=80=94=20sentinel=20contract,=20OnRPCEnd=20comme?= =?UTF-8?q?nt,=20re-allow=20test,=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - controller.cpp: treat -1 specifically (not all negatives) as the inherit sentinel; other negatives still disable backup, preserving old behavior for custom policies that return negative values to disable backup - backup_request_policy.h: document the -1 sentinel contract on GetBackupRequestMs() so custom implementors know the new interface - backup_request_policy.cpp: fix OnRPCEnd comment — called once per user-level RPC, not once per leg (total_count tracks user RPCs) - test: add OnRPCEndDrivesRatioDownAndReAllows — fires 20 backups to suppress, then completes 50 RPCs via OnRPCEnd, verifies DoBackup re-allows once ratio refreshes below max_backup_ratio - docs (EN+CN): rephrase backup_request_ms=-1 note to clarify the channel-level fallback only applies when set via ChannelOptions --- docs/cn/backup_request.md | 2 +- docs/en/backup_request.md | 2 +- src/brpc/backup_request_policy.cpp | 6 +++--- src/brpc/backup_request_policy.h | 2 ++ src/brpc/controller.cpp | 8 ++++---- test/brpc_channel_unittest.cpp | 31 ++++++++++++++++++++++++++++++ 6 files changed, 42 insertions(+), 9 deletions(-) diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index db2125768f..e5733cfc1b 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -74,7 +74,7 @@ channel.Init(..., &options); | 字段 | 默认值 | 说明 | |------|--------|------| -| `backup_request_ms` | -1 | 超时阈值(毫秒);-1 表示继承 `ChannelOptions.backup_request_ms`,必须 >= -1。**仅在通过 `ChannelOptions.backup_request_policy` 设置策略时有效;通过 Controller 注入时必须显式指定 >= 0 的值。** | +| `backup_request_ms` | -1 | 超时阈值(毫秒)。-1 表示继承 `ChannelOptions.backup_request_ms`(仅在通过 `ChannelOptions.backup_request_policy` 设置策略时有效;通过 Controller 注入时没有 channel 级的回退值,应显式指定 >= 0 的值)。必须 >= -1。 | | `max_backup_ratio` | 0.1 | backup比例上限,取值范围 (0, 1] | | `window_size_seconds` | 10 | 滑动窗口宽度(秒),取值范围 [1, 3600] | | `update_interval_seconds` | 5 | 缓存刷新间隔(秒),必须 >= 1 | diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index 5f0a1b0c7d..be265165be 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -75,7 +75,7 @@ channel.Init(..., &options); | Field | Default | Description | |-------|---------|-------------| -| `backup_request_ms` | -1 | Timeout threshold in ms; -1 means inherit from `ChannelOptions.backup_request_ms`; must be >= -1. **Only effective when the policy is set via `ChannelOptions.backup_request_policy`; controller-level injection always requires an explicit >= 0 value.** | +| `backup_request_ms` | -1 | Timeout threshold in ms. -1 means inherit from `ChannelOptions.backup_request_ms` (only works when the policy is set via `ChannelOptions.backup_request_policy`; at controller level there is no channel-level fallback, so set an explicit >= 0 value instead). Must be >= -1. | | `max_backup_ratio` | 0.1 | Max backup ratio; range (0, 1] | | `window_size_seconds` | 10 | Sliding window width in seconds; range [1, 3600] | | `update_interval_seconds` | 5 | Cached-ratio refresh interval in seconds; must be >= 1 | diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp index 78784824e0..0487bdf7d1 100644 --- a/src/brpc/backup_request_policy.cpp +++ b/src/brpc/backup_request_policy.cpp @@ -88,11 +88,11 @@ class BackupRateLimiter { } void OnRPCEnd(const Controller* /*controller*/) { - // Count all completed RPC legs (both original and backup RPCs). + // Count each completed user-level RPC (called once per RPC, not per leg). // Backup decisions are counted in ShouldAllow() at decision time for // faster feedback. As a result, the effective suppression threshold is - // (backup_count / total_legs), where total_legs includes both original - // and backup completions. + // (backup_count / total_count), where total_count is the number of + // user RPCs that have completed. _total_count << 1; } diff --git a/src/brpc/backup_request_policy.h b/src/brpc/backup_request_policy.h index 0009b682e5..13da2a59ba 100644 --- a/src/brpc/backup_request_policy.h +++ b/src/brpc/backup_request_policy.h @@ -29,6 +29,8 @@ class BackupRequestPolicy { // Return the time in milliseconds in which another request // will be sent if RPC does not finish. + // Returning -1 means "inherit the backup_request_ms from ChannelOptions". + // Returning any other negative value disables backup for this RPC. virtual int32_t GetBackupRequestMs(const Controller* controller) const = 0; // Return true if the backup request should be sent. diff --git a/src/brpc/controller.cpp b/src/brpc/controller.cpp index dc2f7af7bc..133d1f0453 100644 --- a/src/brpc/controller.cpp +++ b/src/brpc/controller.cpp @@ -354,10 +354,10 @@ int64_t Controller::backup_request_ms() const { int timeout_ms = _backup_request_ms; if (NULL != _backup_request_policy) { const int32_t policy_ms = _backup_request_policy->GetBackupRequestMs(this); - // Any negative value means the policy defers to the channel-level - // backup_request_ms (set from ChannelOptions). The canonical sentinel - // is -1, but all negative values are treated the same way. - if (policy_ms >= 0) { + // -1 is the designated sentinel: the policy defers to the channel-level + // backup_request_ms (set from ChannelOptions). Any other negative value + // disables backup for this RPC. Values >= 0 override directly. + if (policy_ms != -1) { timeout_ms = policy_ms; } } diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index e358397ca3..67179ad494 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -3184,4 +3184,35 @@ TEST_F(RateLimitedBackupPolicyTest, AfterColdStartBackupSuppressedUntilRpcComple ASSERT_FALSE(p->DoBackup(NULL)); } +// After the ratio rises above the threshold, calling OnRPCEnd() many times +// drives total_count up relative to backup_count. Once the ratio refreshes +// below max_backup_ratio, DoBackup() should allow backups again. +TEST_F(RateLimitedBackupPolicyTest, OnRPCEndDrivesRatioDownAndReAllows) { + brpc::RateLimitedBackupPolicyOptions opts; + opts.backup_request_ms = 10; + opts.max_backup_ratio = 0.5; + opts.window_size_seconds = 1; + opts.update_interval_seconds = 1; + std::unique_ptr p( + brpc::CreateRateLimitedBackupPolicy(opts)); + ASSERT_TRUE(p != NULL); + // Fire many backup decisions so backup_count >> total_count, + // pushing the ratio above max_backup_ratio. + for (int i = 0; i < 20; ++i) { + p->DoBackup(NULL); + } + // Wait for update interval so the ratio is refreshed above threshold. + bthread_usleep(1200000); // 1.2s + ASSERT_FALSE(p->DoBackup(NULL)); + // Now complete many more RPCs than backups fired to bring ratio below 0.5. + // 20 backup decisions already counted; need total_count > 20/0.5 = 40. + for (int i = 0; i < 50; ++i) { + p->OnRPCEnd(NULL); + } + // Wait for the ratio cache to refresh. + bthread_usleep(1200000); // 1.2s + // Ratio is now ~20/50 = 0.4 < max_backup_ratio (0.5), so backup is re-allowed. + ASSERT_TRUE(p->DoBackup(NULL)); +} + } //namespace From cfd78679f7eafe6729f02ccf111992b6da1236e8 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 23:09:25 +0800 Subject: [PATCH 7/9] fix(backup_request): explain why std::nothrow is intentionally omitted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plain new follows brpc's project-wide OOM convention (abort rather than return NULL). The factory's NULL return already exclusively signals invalid parameters, not allocation failure — adding std::nothrow would conflate the two. Comment added to suppress future linter/AI suggestions. --- src/brpc/backup_request_policy.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/brpc/backup_request_policy.cpp b/src/brpc/backup_request_policy.cpp index 0487bdf7d1..851537b3df 100644 --- a/src/brpc/backup_request_policy.cpp +++ b/src/brpc/backup_request_policy.cpp @@ -166,6 +166,9 @@ BackupRequestPolicy* CreateRateLimitedBackupPolicy( << " exceeds window_size_seconds=" << options.window_size_seconds << "; the ratio window will rarely refresh within its own period"; } + // Plain new (without std::nothrow): brpc follows the project-wide convention + // of letting OOM throw/abort rather than returning NULL. NULL return from + // this factory already signals invalid parameters, not allocation failure. return new RateLimitedBackupPolicy( options.backup_request_ms, options.max_backup_ratio, options.window_size_seconds, options.update_interval_seconds); From 8d884947e5591531462cc92ed165c986b3de5676 Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 23:23:18 +0800 Subject: [PATCH 8/9] =?UTF-8?q?docs(backup=5Frequest):=20clarify=20policy?= =?UTF-8?q?=20lifetime=20=E2=80=94=20channel=20must=20be=20destroyed=20bef?= =?UTF-8?q?ore=20policy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unique_ptr comment was ambiguous: 'released when goes out of scope, as long as it outlives the channel' can be read as contradictory. Reword to make the ordering explicit: destroy channel first, then policy. --- docs/cn/backup_request.md | 4 ++-- docs/en/backup_request.md | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/cn/backup_request.md b/docs/cn/backup_request.md index e5733cfc1b..b2e0bb61e0 100644 --- a/docs/cn/backup_request.md +++ b/docs/cn/backup_request.md @@ -60,14 +60,14 @@ opts.window_size_seconds = 10; // 滑动窗口宽度(秒) opts.update_interval_seconds = 5; // 缓存比例的刷新间隔(秒) // CreateRateLimitedBackupPolicy返回的指针由调用方负责释放。 -// 推荐使用unique_ptr管理生命周期,确保policy在Channel销毁后才释放。 +// policy的生命周期必须长于channel——先销毁channel,再销毁policy。 std::unique_ptr policy( brpc::CreateRateLimitedBackupPolicy(opts)); brpc::ChannelOptions options; options.backup_request_policy = policy.get(); // Channel不拥有该对象 channel.Init(..., &options); -// policy在unique_ptr析构时自动释放,确保其生命周期长于channel即可。 +// channel必须在policy析构之前销毁。 ``` 参数说明(`RateLimitedBackupPolicyOptions`): diff --git a/docs/en/backup_request.md b/docs/en/backup_request.md index be265165be..e61f361182 100644 --- a/docs/en/backup_request.md +++ b/docs/en/backup_request.md @@ -60,15 +60,14 @@ opts.window_size_seconds = 10; // sliding window width in seconds opts.update_interval_seconds = 5; // how often the cached ratio is refreshed // The caller owns the returned pointer. -// Use unique_ptr to manage the lifetime; ensure the policy outlives the channel. +// The policy must outlive the channel — destroy the channel before the policy. std::unique_ptr policy( brpc::CreateRateLimitedBackupPolicy(opts)); brpc::ChannelOptions options; options.backup_request_policy = policy.get(); // NOT owned by channel channel.Init(..., &options); -// policy is released automatically when unique_ptr goes out of scope, -// as long as it outlives the channel. +// channel must be destroyed before policy goes out of scope. ``` `RateLimitedBackupPolicyOptions` fields: From 2a0155562eab74810261f6157bc8868e91e07ceb Mon Sep 17 00:00:00 2001 From: yanfeng Date: Sun, 1 Mar 2026 23:51:13 +0800 Subject: [PATCH 9/9] test(backup_request): fix inaccurate cold-start comment in ValidMaxRatioAtBoundary ratio=1.0 conservative path only applies when backup>0 && total==0. True cold start (both zero) sets ratio=0.0 and allows freely. --- test/brpc_channel_unittest.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/brpc_channel_unittest.cpp b/test/brpc_channel_unittest.cpp index 67179ad494..f96650211e 100644 --- a/test/brpc_channel_unittest.cpp +++ b/test/brpc_channel_unittest.cpp @@ -3144,11 +3144,11 @@ TEST_F(RateLimitedBackupPolicyTest, ValidMaxRatioAtBoundary) { std::unique_ptr p( brpc::CreateRateLimitedBackupPolicy(opts)); ASSERT_TRUE(p != NULL); - // With max_backup_ratio=1.0, backup is allowed until backup_count/total - // reaches 1.0. At cold start (total=0) the conservative path applies: - // ratio is treated as 1.0, so DoBackup() returns false only after the - // first backup is counted. - ASSERT_TRUE(p->DoBackup(NULL)); // cold start: no total yet, allow first + // With max_backup_ratio=1.0 and true cold start (total==0, backup==0), + // ShouldAllow() sets ratio=0.0 (free pass). The conservative ratio=1.0 + // path only applies when backup>0 but total==0 (latency spike with no + // completions yet). At absolute cold start DoBackup() must return true. + ASSERT_TRUE(p->DoBackup(NULL)); // cold start: ratio=0.0 < 1.0, allow } TEST_F(RateLimitedBackupPolicyTest, ColdStartAllowsBackup) {