From db559d9a41256804d871b561c17167a4b307cd06 Mon Sep 17 00:00:00 2001 From: shuke Date: Tue, 17 Mar 2026 14:55:55 +0800 Subject: [PATCH] [feature](build) Add build profiling support via DORIS_BUILD_PROFILE (#61410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Add optional build profiling to `build.sh`, gated by `DORIS_BUILD_PROFILE` environment variable - Records build metadata (user, base branch, commit, modified files, duration, exit code, load average) as JSON Lines to `.build_profile.jsonl` - Auto-detects base branch (master/branch-x.y/branch-selectdb-doris-x.y) by finding the closest remote main branch - Only tracks files modified since last build (mtime > last_build_time) - Zero impact when `DORIS_BUILD_PROFILE` is not set — no behavior change for external users ## Deployment Set in `custom_env.sh` on dev machines (no root required): ```bash echo "export DORIS_BUILD_PROFILE=1" >> custom_env.sh ``` --- .gitignore | 2 + build.sh | 13 ++++ build_profile.sh | 186 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100755 build_profile.sh diff --git a/.gitignore b/.gitignore index b515321338c74f..91c21130cd8bee 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ core.* nohup.out /custom_env.sh /custom_env_mac.sh +/.build_profile.jsonl +/.build_profile_state.* derby.log dependency-reduced-pom.xml yarn.lock diff --git a/build.sh b/build.sh index 6eb0d6bb1fdf67..a5e4a7e637e472 100755 --- a/build.sh +++ b/build.sh @@ -38,6 +38,15 @@ export TP_LIB_DIR="${DORIS_THIRDPARTY}/installed/lib" HADOOP_DEPS_NAME="hadoop-deps" . "${DORIS_HOME}/env.sh" +# ===== Build Profile ===== +if [[ "${DORIS_BUILD_PROFILE}" == "1" ]]; then + _BP_STATE="${DORIS_HOME}/.build_profile_state.$$" + "${DORIS_HOME}/build_profile.sh" collect "${_BP_STATE}" "$*" + trap '"${DORIS_HOME}/build_profile.sh" record "${_BP_STATE}" 130; exit 130' INT TERM + trap '"${DORIS_HOME}/build_profile.sh" record "${_BP_STATE}" $?; exit $?' ERR +fi +# ===== End Build Profile ===== + # Check args usage() { echo " @@ -1060,4 +1069,8 @@ if [[ -n "${DORIS_POST_BUILD_HOOK}" ]]; then eval "${DORIS_POST_BUILD_HOOK}" fi +if [[ "${DORIS_BUILD_PROFILE}" == "1" ]]; then + "${DORIS_HOME}/build_profile.sh" record "${_BP_STATE}" 0 +fi + exit 0 diff --git a/build_profile.sh b/build_profile.sh new file mode 100755 index 00000000000000..b3df313dca2f45 --- /dev/null +++ b/build_profile.sh @@ -0,0 +1,186 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# Build profiling helper for build.sh +# Usage: +# build_profile.sh collect +# build_profile.sh record +# +# Controlled by DORIS_BUILD_PROFILE=1 in custom_env.sh. +# All errors are non-fatal — profiling failures never affect +# the build itself. +############################################################## + +set +e + +DORIS_HOME="${DORIS_HOME:-$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)}" +LOG_FILE="${DORIS_HOME}/.build_profile.jsonl" + +# Auto-detect base branch: find the closest remote main branch to HEAD +detect_base_branch() { + local min_count=999999 best="unknown" + while read -r ref; do + local count + count=$(git rev-list --count HEAD ^"${ref}" 2>/dev/null) || continue + if [[ "${count}" -lt "${min_count}" ]]; then + min_count="${count}" + best="${ref#origin/}" + fi + done < <(git branch -r | grep -oE 'origin/(master|branch-[0-9.]+|branch-selectdb-doris-[0-9.]+)$') + echo "${best}" +} + +# Collect modified files: git diff + untracked, filtered by mtime > last_build_time +collect_files() { + local last_time="$1" + while IFS= read -r f; do + [[ -z "$f" || ! -f "$f" ]] && continue + local mtime + if [[ "$(uname -s)" == "Darwin" ]]; then + mtime=$(stat -f %m "$f") + else + mtime=$(stat -c %Y "$f") + fi + [[ "$mtime" -gt "$last_time" ]] && echo "$f" + done < <(git diff --name-only 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null) +} + +# Read last_build_time from log (0 for first build) +get_last_build_time() { + local last_time=0 + if [[ -f "${LOG_FILE}" ]]; then + last_time=$(tail -1 "${LOG_FILE}" | python3 -c \ + "import sys,json; print(json.load(sys.stdin).get('start_time',0))" 2>/dev/null || echo 0) + fi + echo "${last_time}" +} + +cmd_collect() { + local state_file="$1" + local build_args="$2" + + if ! command -v python3 &>/dev/null; then + echo "WARNING: python3 not found, build profiling disabled" + return 1 + fi + + local start_time + start_time=$(date +%s) + local user + user=$(whoami) + local build_dir + build_dir=$(pwd) + local commit + commit=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") + local base_branch + base_branch=$(detect_base_branch) + local last_time + last_time=$(get_last_build_time) + local files + files=$(collect_files "${last_time}") + + # Write state to temp file + cat > "${state_file}" <> "${state_file}" + echo "${files}" >> "${state_file}" +} + +cmd_record() { + local state_file="$1" + local exit_code="$2" + + if [[ ! -f "${state_file}" ]]; then + echo "WARNING: build profile state file not found, skipping" + return 1 + fi + + # Read state (only lines matching _BP_* pattern) + local _BP_START _BP_USER _BP_DIR _BP_COMMIT _BP_BASE_BRANCH _BP_ARGS + while IFS='=' read -r key value; do + [[ "$key" == "===FILES===" ]] && break + [[ "$key" == _BP_* ]] || continue + eval "${key}='${value}'" + done < "${state_file}" + + # Read files (everything after ===FILES=== marker) + local files + files=$(sed -n '/^===FILES===$/,$ p' "${state_file}" | tail -n +2) + + local end_time + end_time=$(date +%s) + local load_avg + load_avg=$(uptime | grep -oE 'load average[s]?: .*' | sed 's/load average[s]\{0,1\}: //') + + # Write record via python3 (env vars + stdin for safety) + echo "${files}" | \ + _BP_USER="${_BP_USER}" \ + _BP_DIR="${_BP_DIR}" \ + _BP_BASE_BRANCH="${_BP_BASE_BRANCH}" \ + _BP_COMMIT="${_BP_COMMIT}" \ + _BP_ARGS="${_BP_ARGS}" \ + _BP_START="${_BP_START}" \ + _BP_EXIT_CODE="${exit_code}" \ + _BP_END_TIME="${end_time}" \ + _BP_LOAD_AVG="${load_avg}" \ + python3 -c " +import json, os, sys + +files = [line.strip() for line in sys.stdin if line.strip()] +start = int(os.environ['_BP_START']) +end = int(os.environ['_BP_END_TIME']) +record = { + 'user': os.environ['_BP_USER'], + 'build_dir': os.environ['_BP_DIR'], + 'base_branch': os.environ['_BP_BASE_BRANCH'], + 'commit': os.environ['_BP_COMMIT'], + 'args': os.environ.get('_BP_ARGS', ''), + 'files': files, + 'start_time': start, + 'end_time': end, + 'duration_sec': end - start, + 'exit_code': int(os.environ['_BP_EXIT_CODE']), + 'load_avg': os.environ['_BP_LOAD_AVG'], +} +print(json.dumps(record)) +" >> "${LOG_FILE}" + + # Clean up state file + rm -f "${state_file}" +} + +case "$1" in + collect) + cmd_collect "$2" "$3" + ;; + record) + cmd_record "$2" "$3" + ;; + *) + echo "Usage: $0 {collect|record} " + exit 1 + ;; +esac