Skip to content

Commit 27fcaee

Browse files
feng-yclaude
andcommitted
feat(backup_request): add rate-limited backup request policy (#3228)
Add ratio-based rate limiting for backup requests to prevent backup request storms under high QPS or downstream latency spikes. Design: - BackupRequestPolicy interface unchanged (ABI stable) - BackupRateLimiter: standalone statistics module tracking backup/total ratio within a sliding time window using bvar counters - RateLimitedBackupPolicy: internal implementation composing BackupRateLimiter, hidden in .cpp - CreateRateLimitedBackupPolicy() factory function in header - ChannelOptions.backup_request_max_ratio: per-channel configuration Priority: backup_request_policy > backup_request_max_ratio > backup_request_ms - Channel auto-creates internal policy when max_ratio > 0 and no user policy; uses max_ratio > 0 as ownership marker for cleanup - 3 gflags with validators: backup_request_max_ratio, backup_request_ratio_window_size_s, backup_request_ratio_update_interval_s Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent aa784b8 commit 27fcaee

File tree

5 files changed

+380
-4
lines changed

5 files changed

+380
-4
lines changed

src/brpc/backup_request_policy.cpp

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "brpc/backup_request_policy.h"
19+
20+
#include <gflags/gflags.h>
21+
#include "brpc/reloadable_flags.h"
22+
#include "bvar/reducer.h"
23+
#include "bvar/window.h"
24+
#include "butil/atomicops.h"
25+
#include "butil/time.h"
26+
27+
namespace brpc {
28+
29+
DEFINE_double(backup_request_max_ratio, -1,
30+
"Maximum ratio of backup requests to total requests. "
31+
"Value in (0, 1]. -1 means no limit (default). Can be overridden "
32+
"per-channel via ChannelOptions.backup_request_max_ratio. "
33+
"Note: takes effect at Channel::Init() time; changing this flag "
34+
"at runtime does not affect already-created channels.");
35+
36+
static bool validate_backup_request_max_ratio(const char*, double v) {
37+
if (v <= 0) return true; // non-positive means disabled
38+
if (v <= 1.0) return true;
39+
LOG(ERROR) << "Invalid backup_request_max_ratio=" << v
40+
<< ", must be <= 0 (disabled) or in (0, 1]";
41+
return false;
42+
}
43+
BRPC_VALIDATE_GFLAG(backup_request_max_ratio,
44+
validate_backup_request_max_ratio);
45+
46+
DEFINE_int32(backup_request_ratio_window_size_s, 10,
47+
"Window size in seconds for computing the backup request ratio. "
48+
"Must be >= 1.");
49+
50+
static bool validate_backup_request_ratio_window_size_s(
51+
const char*, int32_t v) {
52+
if (v >= 1) return true;
53+
LOG(ERROR) << "Invalid backup_request_ratio_window_size_s=" << v
54+
<< ", must be >= 1";
55+
return false;
56+
}
57+
BRPC_VALIDATE_GFLAG(backup_request_ratio_window_size_s,
58+
validate_backup_request_ratio_window_size_s);
59+
60+
DEFINE_int32(backup_request_ratio_update_interval_s, 5,
61+
"Interval in seconds between ratio cache updates. Must be >= 1.");
62+
63+
static bool validate_backup_request_ratio_update_interval_s(
64+
const char*, int32_t v) {
65+
if (v >= 1) return true;
66+
LOG(ERROR) << "Invalid backup_request_ratio_update_interval_s=" << v
67+
<< ", must be >= 1";
68+
return false;
69+
}
70+
BRPC_VALIDATE_GFLAG(backup_request_ratio_update_interval_s,
71+
validate_backup_request_ratio_update_interval_s);
72+
73+
// Standalone statistics module for tracking backup/total request ratio
74+
// within a sliding time window.
75+
class BackupRateLimiter {
76+
public:
77+
BackupRateLimiter(double max_backup_ratio,
78+
int window_size_seconds,
79+
int update_interval_seconds)
80+
: _max_backup_ratio(max_backup_ratio)
81+
, _update_interval_us(update_interval_seconds * 1000000LL)
82+
, _total_window(&_total_count, window_size_seconds)
83+
, _backup_window(&_backup_count, window_size_seconds)
84+
, _cached_ratio(0.0)
85+
, _last_update_us(0) {
86+
}
87+
88+
// All atomic operations use relaxed ordering intentionally.
89+
// This is best-effort rate limiting: a slightly stale ratio is
90+
// acceptable for approximate throttling.
91+
bool ShouldAllow() const {
92+
const int64_t now_us = butil::cpuwide_time_us();
93+
int64_t last_us = _last_update_us.load(butil::memory_order_relaxed);
94+
double ratio = _cached_ratio.load(butil::memory_order_relaxed);
95+
96+
if (now_us - last_us >= _update_interval_us) {
97+
if (_last_update_us.compare_exchange_strong(
98+
last_us, now_us, butil::memory_order_relaxed)) {
99+
int64_t total = _total_window.get_value();
100+
int64_t backup = _backup_window.get_value();
101+
ratio = (total > 0) ? static_cast<double>(backup) / total : 0.0;
102+
_cached_ratio.store(ratio, butil::memory_order_relaxed);
103+
}
104+
}
105+
106+
// max_backup_ratio >= 1.0 means no limit (ratio cannot exceed 1.0).
107+
return _max_backup_ratio >= 1.0 || ratio < _max_backup_ratio;
108+
}
109+
110+
void OnRPCEnd(const Controller* controller) {
111+
_total_count << 1;
112+
if (controller->has_backup_request()) {
113+
_backup_count << 1;
114+
}
115+
}
116+
117+
private:
118+
double _max_backup_ratio;
119+
int64_t _update_interval_us;
120+
121+
bvar::Adder<int64_t> _total_count;
122+
bvar::Adder<int64_t> _backup_count;
123+
bvar::Window<bvar::Adder<int64_t>> _total_window;
124+
bvar::Window<bvar::Adder<int64_t>> _backup_window;
125+
126+
mutable butil::atomic<double> _cached_ratio;
127+
mutable butil::atomic<int64_t> _last_update_us;
128+
};
129+
130+
// Internal BackupRequestPolicy that composes a BackupRateLimiter
131+
// for ratio-based suppression.
132+
class RateLimitedBackupPolicy : public BackupRequestPolicy {
133+
public:
134+
RateLimitedBackupPolicy(int32_t backup_request_ms,
135+
double max_backup_ratio,
136+
int window_size_seconds,
137+
int update_interval_seconds)
138+
: _backup_request_ms(backup_request_ms)
139+
, _rate_limiter(max_backup_ratio, window_size_seconds,
140+
update_interval_seconds) {
141+
}
142+
143+
int32_t GetBackupRequestMs(const Controller* /*controller*/) const override {
144+
return _backup_request_ms;
145+
}
146+
147+
bool DoBackup(const Controller* /*controller*/) const override {
148+
return _rate_limiter.ShouldAllow();
149+
}
150+
151+
void OnRPCEnd(const Controller* controller) override {
152+
_rate_limiter.OnRPCEnd(controller);
153+
}
154+
155+
private:
156+
int32_t _backup_request_ms;
157+
BackupRateLimiter _rate_limiter;
158+
};
159+
160+
BackupRequestPolicy* CreateRateLimitedBackupPolicy(
161+
int32_t backup_request_ms,
162+
double max_backup_ratio,
163+
int window_size_seconds,
164+
int update_interval_seconds) {
165+
if (max_backup_ratio <= 0 || max_backup_ratio > 1.0) {
166+
LOG(ERROR) << "Invalid max_backup_ratio=" << max_backup_ratio
167+
<< ", must be in (0, 1]";
168+
return NULL;
169+
}
170+
if (window_size_seconds < 1) {
171+
LOG(ERROR) << "Invalid window_size_seconds=" << window_size_seconds
172+
<< ", must be >= 1";
173+
return NULL;
174+
}
175+
if (update_interval_seconds < 1) {
176+
LOG(ERROR) << "Invalid update_interval_seconds="
177+
<< update_interval_seconds << ", must be >= 1";
178+
return NULL;
179+
}
180+
return new RateLimitedBackupPolicy(
181+
backup_request_ms, max_backup_ratio,
182+
window_size_seconds, update_interval_seconds);
183+
}
184+
185+
} // namespace brpc

src/brpc/backup_request_policy.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,20 @@ class BackupRequestPolicy {
3838
virtual void OnRPCEnd(const Controller* controller) = 0;
3939
};
4040

41+
// Create a BackupRequestPolicy that limits the ratio of backup requests
42+
// to total requests within a sliding time window. When the ratio reaches
43+
// or exceeds max_backup_ratio, DoBackup() returns false.
44+
// Returns NULL on invalid parameters.
45+
// max_backup_ratio: (0, 1]
46+
// window_size_seconds: >= 1
47+
// update_interval_seconds: >= 1
48+
// The caller owns the returned pointer.
49+
BackupRequestPolicy* CreateRateLimitedBackupPolicy(
50+
int32_t backup_request_ms,
51+
double max_backup_ratio,
52+
int window_size_seconds,
53+
int update_interval_seconds);
54+
4155
}
4256

4357
#endif // BRPC_BACKUP_REQUEST_POLICY_H

src/brpc/channel.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ namespace brpc {
4343

4444
DECLARE_bool(enable_rpcz);
4545
DECLARE_bool(usercode_in_pthread);
46+
DECLARE_double(backup_request_max_ratio);
47+
DECLARE_int32(backup_request_ratio_window_size_s);
48+
DECLARE_int32(backup_request_ratio_update_interval_s);
4649
DEFINE_string(health_check_path, "", "Http path of health check call."
4750
"By default health check succeeds if the server is connectable."
4851
"If this flag is set, health check is not completed until a http "
@@ -63,6 +66,7 @@ ChannelOptions::ChannelOptions()
6366
, log_succeed_without_server(true)
6467
, socket_mode(SOCKET_MODE_TCP)
6568
, auth(NULL)
69+
, backup_request_max_ratio(-1)
6670
, backup_request_policy(NULL)
6771
, retry_policy(NULL)
6872
, ns_filter(NULL)
@@ -164,6 +168,7 @@ Channel::Channel(ProfilerLinker)
164168
, _serialize_request(NULL)
165169
, _pack_request(NULL)
166170
, _get_method_name(NULL)
171+
, _owns_backup_policy(false)
167172
, _preferred_index(-1) {
168173
}
169174

@@ -172,10 +177,20 @@ Channel::~Channel() {
172177
const ChannelSignature sig = ComputeChannelSignature(_options);
173178
SocketMapRemove(SocketMapKey(_server_address, sig));
174179
}
180+
if (_owns_backup_policy) {
181+
delete _options.backup_request_policy;
182+
}
175183
}
176184

177185

178186
int Channel::InitChannelOptions(const ChannelOptions* options) {
187+
// Clean up any previously created internal backup policy (re-Init case).
188+
if (_owns_backup_policy) {
189+
delete _options.backup_request_policy;
190+
_options.backup_request_policy = NULL;
191+
_owns_backup_policy = false;
192+
}
193+
179194
if (options) { // Override default options if user provided one.
180195
_options = *options;
181196
}
@@ -242,6 +257,35 @@ int Channel::InitChannelOptions(const ChannelOptions* options) {
242257
if (!cg.empty() && (::isspace(cg.front()) || ::isspace(cg.back()))) {
243258
butil::TrimWhitespace(cg, butil::TRIM_ALL, &cg);
244259
}
260+
261+
// Create rate-limited backup policy if configured.
262+
// User-provided backup_request_policy takes precedence.
263+
if (_options.backup_request_policy != NULL &&
264+
_options.backup_request_max_ratio > 0) {
265+
LOG(WARNING) << "backup_request_max_ratio is ignored because "
266+
"backup_request_policy is already set";
267+
}
268+
// Per-channel option takes precedence over the global gflag.
269+
double max_ratio = _options.backup_request_max_ratio;
270+
if (max_ratio < 0) {
271+
max_ratio = FLAGS_backup_request_max_ratio;
272+
}
273+
if (max_ratio > 1.0) {
274+
LOG(WARNING) << "backup_request_max_ratio=" << max_ratio
275+
<< " is out of range (0, 1], clamped to 1.0";
276+
max_ratio = 1.0;
277+
}
278+
if (max_ratio > 0 && _options.backup_request_policy == NULL &&
279+
_options.backup_request_ms >= 0) {
280+
BackupRequestPolicy* policy = CreateRateLimitedBackupPolicy(
281+
_options.backup_request_ms, max_ratio,
282+
FLAGS_backup_request_ratio_window_size_s,
283+
FLAGS_backup_request_ratio_update_interval_s);
284+
if (policy) {
285+
_options.backup_request_policy = policy;
286+
_owns_backup_policy = true;
287+
}
288+
}
245289
return 0;
246290
}
247291

src/brpc/channel.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,24 @@ struct ChannelOptions {
116116
// Default: NULL
117117
const Authenticator* auth;
118118

119+
// Maximum ratio of backup requests to total requests within a sliding
120+
// time window. When the ratio reaches or exceeds this value, backup
121+
// requests are suppressed. Value in (0, 1]. -1 means no limit.
122+
// Only effective when backup_request_ms >= 0 and backup_request_policy
123+
// is NULL (i.e. no custom policy). When effective, an internal
124+
// rate-limited BackupRequestPolicy is created and used automatically.
125+
// Default: -1 (no limit, same as FLAGS_backup_request_max_ratio)
126+
double backup_request_max_ratio;
127+
119128
// Customize the backup request time and whether to send backup request.
120-
// Priority: `backup_request_policy' > `backup_request_ms'.
121-
// Overridable by Controller.set_backup_request_ms() or
122-
// Controller.set_backup_request_policy().
123-
// This object is NOT owned by channel and should remain valid when channel is used.
129+
// Priority: `backup_request_policy' > `backup_request_max_ratio' > `backup_request_ms'.
130+
// Overridable per-RPC by Controller.set_backup_request_ms() or
131+
// Controller.set_backup_request_policy(). Note: per-RPC override
132+
// replaces the entire channel-level backup config including any
133+
// internal rate-limited policy created by backup_request_max_ratio.
134+
// When user-supplied, this object is NOT owned by channel and should
135+
// remain valid during channel's lifetime. When backup_request_max_ratio
136+
// creates an internal policy, that policy IS owned by channel.
124137
// Default: NULL
125138
BackupRequestPolicy* backup_request_policy;
126139

@@ -263,6 +276,7 @@ friend class SelectiveChannel;
263276
// the RPC above has finished
264277
butil::intrusive_ptr<SharedLoadBalancer> _lb;
265278
ChannelOptions _options;
279+
bool _owns_backup_policy;
266280
int _preferred_index;
267281
};
268282

0 commit comments

Comments
 (0)