From 54fb175a9bc83f32deeb25071fe3b0f0b8594375 Mon Sep 17 00:00:00 2001 From: Asish Kumar Date: Fri, 24 Apr 2026 02:14:09 +0530 Subject: [PATCH] Fix HA connection metric for failed tunnel dials Count active HA connections only after a tunnel connection has successfully registered with the edge. Failed dial or registration attempts now leave cloudflared_tunnel_ha_connections at zero, matching the metric description and avoiding misleading readiness data. Signed-off-by: Asish Kumar --- supervisor/fuse.go | 4 +++- supervisor/tunnel.go | 12 ++++++++---- supervisor/tunnel_test.go | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/supervisor/fuse.go b/supervisor/fuse.go index 3a143437402..2aec62cd832 100644 --- a/supervisor/fuse.go +++ b/supervisor/fuse.go @@ -26,7 +26,7 @@ func (f *booleanFuse) Value() bool { return f.value == 1 } -func (f *booleanFuse) Fuse(result bool) { +func (f *booleanFuse) Fuse(result bool) bool { f.mu.Lock() defer f.mu.Unlock() newValue := int32(2) @@ -36,7 +36,9 @@ func (f *booleanFuse) Fuse(result bool) { if f.value == 0 { f.value = newValue f.cond.Broadcast() + return true } + return false } // Await blocks until Fuse has been called at least once. diff --git a/supervisor/tunnel.go b/supervisor/tunnel.go index 97013f5a0b2..e86452f8e75 100644 --- a/supervisor/tunnel.go +++ b/supervisor/tunnel.go @@ -186,10 +186,12 @@ type TunnelServer interface { } func (e *EdgeTunnelServer) Serve(ctx context.Context, connIndex uint8, protocolFallback *protocolFallback, connectedSignal *signal.Signal) error { - haConnections.Inc() - defer haConnections.Dec() - connectedFuse := newBooleanFuse() + defer func() { + if connectedFuse.Value() { + haConnections.Dec() + } + }() go func() { if connectedFuse.Await() { connectedSignal.Notify() @@ -702,7 +704,9 @@ type connectedFuse struct { } func (cf *connectedFuse) Connected() { - cf.fuse.Fuse(true) + if cf.fuse.Fuse(true) { + haConnections.Inc() + } cf.backoff.reset() } diff --git a/supervisor/tunnel_test.go b/supervisor/tunnel_test.go index b0b6cef5b08..50bda1867dd 100644 --- a/supervisor/tunnel_test.go +++ b/supervisor/tunnel_test.go @@ -4,9 +4,11 @@ import ( "testing" "time" + dto "github.com/prometheus/client_model/go" "github.com/quic-go/quic-go" "github.com/rs/zerolog" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/cloudflare/cloudflared/connection" "github.com/cloudflare/cloudflared/edgediscovery" @@ -18,6 +20,40 @@ type dynamicMockFetcher struct { err error } +func TestConnectedFuseUpdatesHAConnectionsOnce(t *testing.T) { + fuse := newBooleanFuse() + connectedFuse := &connectedFuse{ + fuse: fuse, + backoff: &protocolFallback{ + BackoffHandler: retry.NewBackoff(3, time.Millisecond, false), + }, + } + initial := haConnectionsValue(t) + defer haConnections.Set(initial) + + connectedFuse.Connected() + assert.Equal(t, initial+1, haConnectionsValue(t)) + + connectedFuse.Connected() + assert.Equal(t, initial+1, haConnectionsValue(t)) +} + +func TestUnconnectedFuseDoesNotUpdateHAConnections(t *testing.T) { + initial := haConnectionsValue(t) + defer haConnections.Set(initial) + + fuse := newBooleanFuse() + fuse.Fuse(false) + + assert.Equal(t, initial, haConnectionsValue(t)) +} + +func haConnectionsValue(t *testing.T) float64 { + var metric dto.Metric + require.NoError(t, haConnections.Write(&metric)) + return metric.GetGauge().GetValue() +} + func (dmf *dynamicMockFetcher) fetch() edgediscovery.PercentageFetcher { return func() (edgediscovery.ProtocolPercents, error) { return dmf.protocolPercents, dmf.err