From df4ba439707c6bc7e0e6428f35c7c0ed2098409a Mon Sep 17 00:00:00 2001 From: Mukunda Rao Katta Date: Sun, 26 Apr 2026 14:53:42 -0700 Subject: [PATCH 1/2] fix(ci): wait for tracecontext server readiness instead of fixed sleep Replace the racy 'sleep 1' before the W3C tracecontext tests with an active readiness probe against 127.0.0.1:5000. The fixed sleep was too short on slow CI runners and produced intermittent connection errors against the Flask example server. Closes #5104 --- scripts/tracecontext-integration-test.sh | 32 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/scripts/tracecontext-integration-test.sh b/scripts/tracecontext-integration-test.sh index 1195e7facfc..009a840ebfe 100755 --- a/scripts/tracecontext-integration-test.sh +++ b/scripts/tracecontext-integration-test.sh @@ -7,15 +7,11 @@ mkdir -p target rm -rf ./target/trace-context git clone https://github.com/w3c/trace-context ./target/trace-context cd ./target/trace-context && git checkout $TRACECONTEXT_GIT_TAG && cd - -# start example opentelemetry service, which propagates trace-context by +# start example opentelemetry service, which propagates trace-context by # default. python ./tests/w3c_tracecontext_validation_server.py 1>&2 & EXAMPLE_SERVER_PID=$! -# give the app server a little time to start up. Not adding some sort -# of delay would cause many of the tracecontext tests to fail being -# unable to connect. -sleep 1 -onshutdown() +onshutdown() { # send a sigint, to ensure # it is caught as a KeyboardInterrupt in the @@ -23,6 +19,30 @@ onshutdown() kill $EXAMPLE_SERVER_PID } trap onshutdown EXIT +# Wait for the example server to accept connections on 127.0.0.1:5000 +# before running the W3C tracecontext tests. A fixed `sleep` raced the +# Flask startup on slow CI runners and produced intermittent connection +# errors (see issue #5104). +wait_for_server() { + host=127.0.0.1 + port=5000 + deadline=$(( $(date +%s) + 30 )) + while [ "$(date +%s)" -lt "$deadline" ]; do + # Bail out early if the server process died. + if ! kill -0 "$EXAMPLE_SERVER_PID" 2>/dev/null; then + echo "tracecontext example server exited before becoming ready" >&2 + return 1 + fi + # Use python so we don't depend on extra tools (nc, curl, etc.). + if python -c "import socket,sys; s=socket.socket(); s.settimeout(1); sys.exit(0 if s.connect_ex(('$host', $port)) == 0 else 1)" 2>/dev/null; then + return 0 + fi + sleep 0.5 + done + echo "tracecontext example server did not become ready within 30s" >&2 + return 1 +} +wait_for_server cd ./target/trace-context/test # The disabled test is not compatible with an optional part of the W3C From 0afbf06b031cc97f295d7932d12a497dd85c1a33 Mon Sep 17 00:00:00 2001 From: Mukunda Katta Date: Mon, 27 Apr 2026 22:25:12 -0700 Subject: [PATCH 2/2] docs(changelog): note ci tracecontext readiness fix (#5149) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 614f240d4ee..941551d8a99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#5120](https://github.com/open-telemetry/opentelemetry-python/pull/5120)) - Add WeaverLiveCheck test util ([#5088](https://github.com/open-telemetry/opentelemetry-python/pull/5088)) +- ci: wait for tracecontext server readiness instead of a fixed sleep in `scripts/tracecontext-integration-test.sh` + ([#5149](https://github.com/open-telemetry/opentelemetry-python/pull/5149)) ## Version 1.41.0/0.62b0 (2026-04-09)