From 758981e9a86816bc2018fa93eb5d4f29d0583679 Mon Sep 17 00:00:00 2001 From: june-hua Date: Wed, 12 Nov 2025 16:37:46 -0500 Subject: [PATCH 1/2] Add monitoring to startup --- .../butane/004-git-clone-devcontainer.sh | 21 +------ startupscript/butane/monitoring-utils.sh | 63 +++++++++++++++++++ startupscript/butane/pre-devcontainer.sh | 16 ++++- startupscript/butane/probe-proxy-readiness.sh | 12 ++++ startupscript/butane/service-utils.sh | 21 +++++++ 5 files changed, 111 insertions(+), 22 deletions(-) create mode 100644 startupscript/butane/monitoring-utils.sh create mode 100644 startupscript/butane/service-utils.sh diff --git a/startupscript/butane/004-git-clone-devcontainer.sh b/startupscript/butane/004-git-clone-devcontainer.sh index 429c8e62..87eddc8a 100755 --- a/startupscript/butane/004-git-clone-devcontainer.sh +++ b/startupscript/butane/004-git-clone-devcontainer.sh @@ -17,19 +17,7 @@ if [[ $# -lt 1 ]]; then usage fi - -# Map the server to appropriate service path -function get_service_url() { - case "$1" in - "dev-stable") echo "https://workbench-dev.verily.com/api/$2" ;; - "dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/$2" ;; - "test") echo "https://workbench-test.verily.com/api/$2" ;; - "prod") echo "https://workbench.verily.com/api/$2" ;; - *) return 1 ;; - esac -} -readonly -f get_service_url - +source /home/core/service-utils.sh source /home/core/metadata-utils.sh # To accommodate the use of SSH URLs for public Git repositories, set the following Git configuration: @@ -58,12 +46,7 @@ api_url="${api_url%.git}" private_status=$(curl --retry 5 -s "${api_url}" | jq -r ".status") if [[ "${PRIVATE_DEVCONTAINER_ENABLED}" == "TRUE" && "${private_status}" == 404 ]]; then # Get ECM service URL - SERVER="$(get_metadata_value "terra-cli-server" "")" - readonly SERVER - if [[ -z "${SERVER}" ]]; then - SERVER="prod" - fi - if ! ECM_SERVICE_URL="$(get_service_url "${SERVER}" "ecm")"; then + if ! ECM_SERVICE_URL="$(get_service_url "ecm")"; then exit 1 fi diff --git a/startupscript/butane/monitoring-utils.sh b/startupscript/butane/monitoring-utils.sh new file mode 100644 index 00000000..474e7a56 --- /dev/null +++ b/startupscript/butane/monitoring-utils.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# monitoring-utils.sh defines helper functions for notifying WSM of VM startup states. + +source /home/core/service-utils.sh + +WORKSPACE_ID="$(get_metadata_value "terra-workspace-id" "")" +RESOURCE_ID="$(get_metadata_value "terra-resource-id" "")" +readonly WORKSPACE_ID RESOURCE_ID + +# Get WSM endpoint URL +if ! WSM_SERVICE_URL="$(get_service_url "wsm")"; then + exit 1 +fi +LOG_URL="${WSM_SERVICE_URL}/api/workspaces/${WORKSPACE_ID}/resource/${RESOURCE_ID}/instance-state" + +function record_devcontainer_end() { + if [[ $# -lt 2 || ("$2" != "0" && "$2" != "1") ]]; then + echo "usage: record_devcontainer_end " + exit 1 + fi + SUCCESS="$1" + payload=$(cat <&2 + return 1 + fi + echo "VM state recorded successfully: ${response_body}" +} + +function record_devcontainer_start() { + payload=$(cat <&2 + return 1 + fi + echo "VM state recorded successfully: ${response_body}" +} diff --git a/startupscript/butane/pre-devcontainer.sh b/startupscript/butane/pre-devcontainer.sh index 1d58a053..a38d07e2 100644 --- a/startupscript/butane/pre-devcontainer.sh +++ b/startupscript/butane/pre-devcontainer.sh @@ -1,6 +1,16 @@ #!/bin/bash -# pre-devcontainer.sh creates a file used by the devcontainer service to keep -# track of the number of service failures. +# pre-devcontainer.sh creates a file used by the devcontainer service for monitoring +# and to keep track of the number of service failures. -touch /tmp/devcontainer-failure-count \ No newline at end of file +touch /tmp/devcontainer-failure-count + +MONITORING_UTILS_FILE="/home/core/monitoring-utils.sh" +FIRST_BOOT_START_FILE="/home/core/first-boot-start" +if [[ -f "${MONITORING_UTILS_FILE}" && ! -f "${FIRST_BOOT_START_FILE}" ]]; then + # First boot file does not exist + # Record startup begin for monitoring + source "${MONITORING_UTILS_FILE}" + record_devcontainer_start "${CLOUD_PLATFORM}" +fi +touch "${FIRST_BOOT_START_FILE}" \ No newline at end of file diff --git a/startupscript/butane/probe-proxy-readiness.sh b/startupscript/butane/probe-proxy-readiness.sh index d36a1670..1c1f6ae6 100644 --- a/startupscript/butane/probe-proxy-readiness.sh +++ b/startupscript/butane/probe-proxy-readiness.sh @@ -15,9 +15,21 @@ if docker ps -q --filter "name=proxy-agent" | grep -q . \ && docker ps -q --filter "name=application-server" | grep -q .; then echo "Proxy is ready." status="$(get_guest_attribute "startup_script/status" "")" + success=0 if [[ "${status}" != "ERROR" ]]; then set_metadata "startup_script/status" "COMPLETE" + success=1 fi + + FIRST_BOOT_END_FILE="/home/core/first-boot-end" + MONITORING_UTILS_FILE="/home/core/monitoring-utils.sh" + if [[ -f "${MONITORING_UTILS_FILE}" && ! -f "${FIRST_BOOT_END_FILE}" ]]; then + # first boot file does not exist + # record startup end for monitoring + source "${MONITORING_UTILS_FILE}" + record_devcontainer_end "${success}" + fi + touch "${FIRST_BOOT_END_FILE}" else echo "proxy-agent or application-server is not started" exit 1 diff --git a/startupscript/butane/service-utils.sh b/startupscript/butane/service-utils.sh new file mode 100644 index 00000000..8fee3994 --- /dev/null +++ b/startupscript/butane/service-utils.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# service-utils defines helper functions for determining service path. + +# Map the server to appropriate service path +function get_service_url() { + SERVER="$(get_metadata_value "terra-cli-server" "")" + if [[ -z "${SERVER}" ]]; then + SERVER="dev-stable" + fi + readonly SERVER + + + case "${SERVER}" in + "dev-stable") echo "https://workbench-dev.verily.com/api/${SERVER}" ;; + "dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/${SERVER}" ;; + "test") echo "https://workbench-test.verily.com/api/${SERVER}" ;; + "prod") echo "https://workbench.verily.com/api/${SERVER}" ;; + *) return 1 ;; + esac +} +readonly -f get_service_url \ No newline at end of file From aa7d6b152b5445f2c87451cbe86c2c005792f258 Mon Sep 17 00:00:00 2001 From: june-hua Date: Thu, 20 Nov 2025 22:31:38 -0500 Subject: [PATCH 2/2] fixes 1. decouple util files 2. fetch workspace_id from user_facing_id --- .../butane/004-git-clone-devcontainer.sh | 3 +- startupscript/butane/monitoring-utils.sh | 87 +++++++++++-------- startupscript/butane/pre-devcontainer.sh | 26 +++++- startupscript/butane/probe-proxy-readiness.sh | 17 +++- startupscript/butane/service-utils.sh | 19 ++-- 5 files changed, 97 insertions(+), 55 deletions(-) diff --git a/startupscript/butane/004-git-clone-devcontainer.sh b/startupscript/butane/004-git-clone-devcontainer.sh index 87eddc8a..92d6646d 100755 --- a/startupscript/butane/004-git-clone-devcontainer.sh +++ b/startupscript/butane/004-git-clone-devcontainer.sh @@ -46,7 +46,8 @@ api_url="${api_url%.git}" private_status=$(curl --retry 5 -s "${api_url}" | jq -r ".status") if [[ "${PRIVATE_DEVCONTAINER_ENABLED}" == "TRUE" && "${private_status}" == 404 ]]; then # Get ECM service URL - if ! ECM_SERVICE_URL="$(get_service_url "ecm")"; then + SERVER="$(get_metadata_value "terra-cli-server" "prod")" + if ! ECM_SERVICE_URL="$(get_service_url "ecm" "${SERVER}")"; then exit 1 fi diff --git a/startupscript/butane/monitoring-utils.sh b/startupscript/butane/monitoring-utils.sh index 474e7a56..97a911f4 100644 --- a/startupscript/butane/monitoring-utils.sh +++ b/startupscript/butane/monitoring-utils.sh @@ -1,63 +1,74 @@ #!/bin/bash # monitoring-utils.sh defines helper functions for notifying WSM of VM startup states. -source /home/core/service-utils.sh - -WORKSPACE_ID="$(get_metadata_value "terra-workspace-id" "")" -RESOURCE_ID="$(get_metadata_value "terra-resource-id" "")" -readonly WORKSPACE_ID RESOURCE_ID +# Log an event to WSM +function log_event() { + if [[ $# -lt 4 ]]; then + echo "usage: log_event " >&2 + return 1 + fi -# Get WSM endpoint URL -if ! WSM_SERVICE_URL="$(get_service_url "wsm")"; then - exit 1 -fi -LOG_URL="${WSM_SERVICE_URL}/api/workspaces/${WORKSPACE_ID}/resource/${RESOURCE_ID}/instance-state" + # Input params + local wsm_url="$1" + local workspace_id="$2" + local resource_id="$3" + local payload="$4" + local log_url="${wsm_url}/api/workspaces/v1/${workspace_id}/resources/${resource_id}/instance-state" -function record_devcontainer_end() { - if [[ $# -lt 2 || ("$2" != "0" && "$2" != "1") ]]; then - echo "usage: record_devcontainer_end " - exit 1 - fi - SUCCESS="$1" - payload=$(cat <&2 return 1 fi + echo "VM state recorded successfully: ${response_body}" } +# Record devcontainer start event function record_devcontainer_start() { + if [[ $# -lt 3 ]]; then + echo "usage: record_devcontainer_start " >&2 + return 1 + fi + + local payload payload=$(cat <&2 + log_event "$1" "$2" "$3" "$payload" +} + +# Record devcontainer end event +function record_devcontainer_end() { + if [[ $# -lt 4 || ("$4" != "true" && "$4" != "false") ]]; then + echo "usage: record_devcontainer_end " >&2 return 1 fi - echo "VM state recorded successfully: ${response_body}" + + local success="$4" + local payload + payload=$(cat < "${WORKSPACE_ID_CACHE_FILE}" + + ## Record devcontainer begin for monitoring source "${MONITORING_UTILS_FILE}" - record_devcontainer_start "${CLOUD_PLATFORM}" + RESOURCE_ID="$(get_metadata_value "wb-resource-id" "")" + record_devcontainer_start "${WSM_SERVICE_URL}" "${WORKSPACE_ID}" "${RESOURCE_ID}" fi -touch "${FIRST_BOOT_START_FILE}" \ No newline at end of file +touch "${FIRST_BOOT_START_FILE}" diff --git a/startupscript/butane/probe-proxy-readiness.sh b/startupscript/butane/probe-proxy-readiness.sh index 1c1f6ae6..62984a6d 100644 --- a/startupscript/butane/probe-proxy-readiness.sh +++ b/startupscript/butane/probe-proxy-readiness.sh @@ -15,19 +15,28 @@ if docker ps -q --filter "name=proxy-agent" | grep -q . \ && docker ps -q --filter "name=application-server" | grep -q .; then echo "Proxy is ready." status="$(get_guest_attribute "startup_script/status" "")" - success=0 + isSuccess="false" if [[ "${status}" != "ERROR" ]]; then set_metadata "startup_script/status" "COMPLETE" - success=1 + isSuccess="true" fi FIRST_BOOT_END_FILE="/home/core/first-boot-end" MONITORING_UTILS_FILE="/home/core/monitoring-utils.sh" if [[ -f "${MONITORING_UTILS_FILE}" && ! -f "${FIRST_BOOT_END_FILE}" ]]; then # first boot file does not exist - # record startup end for monitoring + # record devcontainer end for monitoring + source /home/core/service-utils.sh source "${MONITORING_UTILS_FILE}" - record_devcontainer_end "${success}" + + # Fetch required values + WORKSPACE_ID_CACHE_FILE="/tmp/workspace_id_cache" + WORKSPACE_ID=$(cat "${WORKSPACE_ID_CACHE_FILE}") + RESOURCE_ID="$(get_metadata_value "wb-resource-id" "")" + SERVER="$(get_metadata_value "terra-cli-server" "prod")" + WSM_SERVICE_URL="$(get_service_url "wsm" "${SERVER}")" + + record_devcontainer_end "${WSM_SERVICE_URL}" "${WORKSPACE_ID}" "${RESOURCE_ID}" "${isSuccess}" fi touch "${FIRST_BOOT_END_FILE}" else diff --git a/startupscript/butane/service-utils.sh b/startupscript/butane/service-utils.sh index 8fee3994..aed00026 100644 --- a/startupscript/butane/service-utils.sh +++ b/startupscript/butane/service-utils.sh @@ -3,19 +3,20 @@ # Map the server to appropriate service path function get_service_url() { - SERVER="$(get_metadata_value "terra-cli-server" "")" - if [[ -z "${SERVER}" ]]; then - SERVER="dev-stable" + if [[ $# -lt 2 ]]; then + echo "usage: get_service_url " >&2 + return 1 fi - readonly SERVER + local SERVICE="$1" + local SERVER="$2" case "${SERVER}" in - "dev-stable") echo "https://workbench-dev.verily.com/api/${SERVER}" ;; - "dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/${SERVER}" ;; - "test") echo "https://workbench-test.verily.com/api/${SERVER}" ;; - "prod") echo "https://workbench.verily.com/api/${SERVER}" ;; + "dev-stable") echo "https://workbench-dev.verily.com/api/${SERVICE}" ;; + "dev-unstable") echo "https://workbench-dev-unstable.verily.com/api/${SERVICE}" ;; + "test") echo "https://workbench-test.verily.com/api/${SERVICE}" ;; + "prod") echo "https://workbench.verily.com/api/${SERVICE}" ;; *) return 1 ;; esac } -readonly -f get_service_url \ No newline at end of file +readonly -f get_service_url