diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e24bdca5251..4c6e02d4dc2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,6 +26,7 @@ jobs: unit-test: if: github.repository_owner == 'getsentry' runs-on: ${{ matrix.os }} + timeout-minutes: 30 strategy: matrix: os: [ubuntu-24.04, ubuntu-24.04-arm] @@ -40,6 +41,7 @@ jobs: integration-test: if: github.repository_owner == 'getsentry' runs-on: ${{ matrix.os }} + timeout-minutes: 30 strategy: fail-fast: false matrix: diff --git a/action.yaml b/action.yaml index 601aaa13099..5846f1f4f28 100644 --- a/action.yaml +++ b/action.yaml @@ -264,3 +264,6 @@ runs: echo "::group::Inspect failure - docker compose logs" docker compose logs echo "::endgroup::" + echo "::group::Inspect failure - docker stats" + docker stats --no-stream + echo "::endgroup::" diff --git a/docker-compose.yml b/docker-compose.yml index cdf01563a5c..9f4c92a84ca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -201,8 +201,8 @@ services: KAFKA_TOOLS_LOG4J_LOGLEVEL: "WARN" ulimits: nofile: - soft: 4096 - hard: 4096 + soft: 100000 + hard: 100000 volumes: - "sentry-kafka:/var/lib/kafka/data" - "sentry-kafka-log:/var/lib/kafka/log" diff --git a/sentry/sentry.conf.example.py b/sentry/sentry.conf.example.py index 3bdc3e1a19d..b901986ef9a 100644 --- a/sentry/sentry.conf.example.py +++ b/sentry/sentry.conf.example.py @@ -221,7 +221,17 @@ def get_internal_network(): DEFAULT_KAFKA_OPTIONS = { "bootstrap.servers": "kafka:9092", "message.max.bytes": 50000000, - "socket.timeout.ms": 1000, + "socket.timeout.ms": 10000, # Timeout for individual socket operations (send/recv) + "request.timeout.ms": 30000, # Max time to wait for a broker response before failing + "retries": 5, # Number of retries for transient/retriable request failures + "retry.backoff.ms": 1000, # Wait time between retry attempts + "reconnect.backoff.ms": 1000, # Initial wait before reconnecting after a lost connection + "reconnect.backoff.max.ms": 10000, # Upper bound for exponential backoff on reconnect attempts + # Session & heartbeat — must satisfy: + # heartbeat.interval.ms < session.timeout.ms < max.poll.interval.ms + "session.timeout.ms": 60000, # Grace period before broker evicts an unresponsive consumer (default: 45s) + "heartbeat.interval.ms": 20000, # How often the consumer sends a heartbeat — must be 1/3 of session.timeout.ms + "max.poll.interval.ms": 600000, # Max allowed time between poll() calls before the consumer is considered dead } SENTRY_EVENTSTREAM = "sentry.eventstream.kafka.KafkaEventStream"