Skip to content

Commit cb9bad7

Browse files
committed
fix exponential backoff overflow
The `backoff` function's wait time calculation completely overflows `time.Duration` on the 55th retry (approximately after 6 hours). This results in zero wait times, leading to the uncontrolled spawn of hundreds of goroutines, which can cause memory exhaustion and OOM kill on linux.
1 parent 1c21c37 commit cb9bad7

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

services/http.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package services
22

33
import (
44
"fmt"
5-
"math"
65
"net/http"
76
"strconv"
87
"time"
@@ -19,7 +18,7 @@ const (
1918
// sender is a helper for sending post requests. If the request fails, sender calulates an
2019
// exponential backoff time using retryWaitSeconds and return it as the sleep time.
2120
type sender struct {
22-
failCount int
21+
nextWait time.Duration
2322
}
2423

2524
// post posts data to the specified URL and returns the response, the sleep time in seconds, and any
@@ -38,7 +37,7 @@ func (s *sender) post(req *http.Request, httpClient *http.Client) (*http.Respons
3837
return resp, s.backoff(), err
3938
}
4039

41-
s.failCount = 0
40+
s.nextWait = retryWaitSeconds
4241

4342
var sleepTime int64
4443
if sleepVal := resp.Header.Get(common.SleepHeader); sleepVal != "" {
@@ -66,11 +65,11 @@ func (s *sender) doPost(req *http.Request, httpClient *http.Client) (*http.Respo
6665

6766
// backoff calculates the backoff time in seconds for the next retry.
6867
func (s *sender) backoff() int64 {
69-
wait := time.Duration(math.Pow(2, float64(s.failCount))) * retryWaitSeconds
70-
s.failCount++
68+
wait := s.nextWait
7169

72-
if wait > maxRetryWait {
73-
return int64(maxRetryWait.Seconds())
70+
s.nextWait *= 2
71+
if s.nextWait > maxRetryWait {
72+
s.nextWait = maxRetryWait
7473
}
7574

7675
return int64(wait.Seconds())

0 commit comments

Comments
 (0)