Skip to content

Commit c5986f2

Browse files
authored
Add monitoring to devcontainer startup (#268)
Record devcontainer start and end times
1 parent ab72714 commit c5986f2

File tree

5 files changed

+153
-22
lines changed

5 files changed

+153
-22
lines changed

startupscript/butane/004-git-clone-devcontainer.sh

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,7 @@ if [[ $# -lt 1 ]]; then
1717
usage
1818
fi
1919

20-
21-
# Map the server to appropriate service path
22-
function get_service_url() {
23-
case "$1" in
24-
"dev-stable") echo "https://workbench-dev.verily.com/api/$2" ;;
25-
"dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/$2" ;;
26-
"test") echo "https://workbench-test.verily.com/api/$2" ;;
27-
"prod") echo "https://workbench.verily.com/api/$2" ;;
28-
*) return 1 ;;
29-
esac
30-
}
31-
readonly -f get_service_url
32-
20+
source /home/core/service-utils.sh
3321
source /home/core/metadata-utils.sh
3422

3523
# To accommodate the use of SSH URLs for public Git repositories, set the following Git configuration:
@@ -58,12 +46,8 @@ api_url="${api_url%.git}"
5846
private_status=$(curl --retry 5 -s "${api_url}" | jq -r ".status")
5947
if [[ "${PRIVATE_DEVCONTAINER_ENABLED}" == "TRUE" && "${private_status}" == 404 ]]; then
6048
# Get ECM service URL
61-
SERVER="$(get_metadata_value "terra-cli-server" "")"
62-
readonly SERVER
63-
if [[ -z "${SERVER}" ]]; then
64-
SERVER="prod"
65-
fi
66-
if ! ECM_SERVICE_URL="$(get_service_url "${SERVER}" "ecm")"; then
49+
SERVER="$(get_metadata_value "terra-cli-server" "prod")"
50+
if ! ECM_SERVICE_URL="$(get_service_url "ecm" "${SERVER}")"; then
6751
exit 1
6852
fi
6953

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
# monitoring-utils.sh defines helper functions for notifying WSM of VM startup states.
3+
4+
# Log an event to WSM
5+
function log_event() {
6+
if [[ $# -lt 4 ]]; then
7+
echo "usage: log_event <wsm_url> <workspace_id> <resource_id> <payload>" >&2
8+
return 1
9+
fi
10+
11+
# Input params
12+
local wsm_url="$1"
13+
local workspace_id="$2"
14+
local resource_id="$3"
15+
local payload="$4"
16+
local log_url="${wsm_url}/api/workspaces/v1/${workspace_id}/resources/${resource_id}/instance-state"
17+
18+
# Log VM event
19+
local response
20+
response=$(curl -s -X POST "${log_url}" \
21+
-H "Authorization: Bearer $(/home/core/wb.sh auth print-access-token)" \
22+
-H "Content-Type: application/json" \
23+
-d "$payload" \
24+
-w "\n%{http_code}")
25+
26+
local http_code
27+
http_code=$(echo "$response" | tail -n1)
28+
local response_body
29+
response_body=$(echo "$response" | head -n -1)
30+
31+
if [[ "$http_code" != "200" ]]; then
32+
echo "Failed to record VM state. HTTP ${http_code}: ${response_body}" >&2
33+
return 1
34+
fi
35+
36+
echo "VM state recorded successfully: ${response_body}"
37+
}
38+
39+
# Record devcontainer start event
40+
function record_devcontainer_start() {
41+
if [[ $# -lt 3 ]]; then
42+
echo "usage: record_devcontainer_start <wsm_url> <workspace_id> <resource_id>" >&2
43+
return 1
44+
fi
45+
46+
local payload
47+
payload=$(cat <<EOF
48+
{
49+
"event": "DEVCONTAINER_START",
50+
"isSuccess": true
51+
}
52+
EOF
53+
)
54+
log_event "$1" "$2" "$3" "$payload"
55+
}
56+
57+
# Record devcontainer end event
58+
function record_devcontainer_end() {
59+
if [[ $# -lt 4 || ("$4" != "true" && "$4" != "false") ]]; then
60+
echo "usage: record_devcontainer_end <wsm_url> <workspace_id> <resource_id> <isSuccess - true/false>" >&2
61+
return 1
62+
fi
63+
64+
local success="$4"
65+
local payload
66+
payload=$(cat <<EOF
67+
{
68+
"event": "DEVCONTAINER_END",
69+
"isSuccess": ${success}
70+
}
71+
EOF
72+
)
73+
log_event "$1" "$2" "$3" "$payload"
74+
}
Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
11
#!/bin/bash
22

3-
# pre-devcontainer.sh creates a file used by the devcontainer service to keep
4-
# track of the number of service failures.
3+
# pre-devcontainer.sh creates a file used by the devcontainer service for monitoring
4+
# and to keep track of the number of service failures.
55

6-
touch /tmp/devcontainer-failure-count
6+
set -o errexit
7+
set -o nounset
8+
set -o pipefail
9+
set -o xtrace
10+
11+
# Keep track of the number of service failures
12+
touch /tmp/devcontainer-failure-count
13+
14+
# Fetch and cache workspace_id
15+
source /home/core/metadata-utils.sh
16+
source /home/core/service-utils.sh
17+
MONITORING_UTILS_FILE="/home/core/monitoring-utils.sh"
18+
WORKSPACE_ID_CACHE_FILE="/tmp/workspace_id_cache"
19+
FIRST_BOOT_START_FILE="/home/core/first-boot-start"
20+
if [[ -f "${MONITORING_UTILS_FILE}" && ! -f "${FIRST_BOOT_START_FILE}" ]]; then
21+
# First boot file does not exist
22+
## Cache workspace id to be used by probe-proxy-readiness.sh
23+
WORKSPACE_USER_FACING_ID="$(get_metadata_value "terra-workspace-id" "")"
24+
SERVER="$(get_metadata_value "terra-cli-server" "prod")"
25+
WSM_SERVICE_URL="$(get_service_url "wsm" "${SERVER}")"
26+
RESPONSE=$(curl -s -X GET "${WSM_SERVICE_URL}/api/workspaces/v1/workspaceByUserFacingId/${WORKSPACE_USER_FACING_ID}" \
27+
-H "Authorization: Bearer $(/home/core/wb.sh auth print-access-token)")
28+
WORKSPACE_ID=$(echo "${RESPONSE}" | jq -r '.id');
29+
echo "${WORKSPACE_ID}" > "${WORKSPACE_ID_CACHE_FILE}"
30+
31+
## Record devcontainer begin for monitoring
32+
source "${MONITORING_UTILS_FILE}"
33+
RESOURCE_ID="$(get_metadata_value "wb-resource-id" "")"
34+
record_devcontainer_start "${WSM_SERVICE_URL}" "${WORKSPACE_ID}" "${RESOURCE_ID}"
35+
fi
36+
touch "${FIRST_BOOT_START_FILE}"

startupscript/butane/probe-proxy-readiness.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,30 @@ if docker ps -q --filter "name=proxy-agent" | grep -q . \
1515
&& docker ps -q --filter "name=application-server" | grep -q .; then
1616
echo "Proxy is ready."
1717
status="$(get_guest_attribute "startup_script/status" "")"
18+
isSuccess="false"
1819
if [[ "${status}" != "ERROR" ]]; then
1920
set_metadata "startup_script/status" "COMPLETE"
21+
isSuccess="true"
2022
fi
23+
24+
FIRST_BOOT_END_FILE="/home/core/first-boot-end"
25+
MONITORING_UTILS_FILE="/home/core/monitoring-utils.sh"
26+
if [[ -f "${MONITORING_UTILS_FILE}" && ! -f "${FIRST_BOOT_END_FILE}" ]]; then
27+
# first boot file does not exist
28+
# record devcontainer end for monitoring
29+
source /home/core/service-utils.sh
30+
source "${MONITORING_UTILS_FILE}"
31+
32+
# Fetch required values
33+
WORKSPACE_ID_CACHE_FILE="/tmp/workspace_id_cache"
34+
WORKSPACE_ID=$(cat "${WORKSPACE_ID_CACHE_FILE}")
35+
RESOURCE_ID="$(get_metadata_value "wb-resource-id" "")"
36+
SERVER="$(get_metadata_value "terra-cli-server" "prod")"
37+
WSM_SERVICE_URL="$(get_service_url "wsm" "${SERVER}")"
38+
39+
record_devcontainer_end "${WSM_SERVICE_URL}" "${WORKSPACE_ID}" "${RESOURCE_ID}" "${isSuccess}"
40+
fi
41+
touch "${FIRST_BOOT_END_FILE}"
2142
else
2243
echo "proxy-agent or application-server is not started"
2344
exit 1
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# service-utils defines helper functions for determining service path.
3+
4+
# Map the server to appropriate service path
5+
function get_service_url() {
6+
if [[ $# -lt 2 ]]; then
7+
echo "usage: get_service_url <service> <server>" >&2
8+
return 1
9+
fi
10+
11+
local SERVICE="$1"
12+
local SERVER="$2"
13+
14+
case "${SERVER}" in
15+
"dev-stable") echo "https://workbench-dev.verily.com/api/${SERVICE}" ;;
16+
"dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/${SERVICE}" ;;
17+
"test") echo "https://workbench-test.verily.com/api/${SERVICE}" ;;
18+
"prod") echo "https://workbench.verily.com/api/${SERVICE}" ;;
19+
*) return 1 ;;
20+
esac
21+
}
22+
readonly -f get_service_url

0 commit comments

Comments
 (0)