Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 134 additions & 46 deletions .github/workflows/e2e-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ jobs:
MIN_MEM_GI_PER_NODE=12
MIN_CPU_PER_NODE=4
MIN_NODES_FOR_PLACEMENT=3
POWER_OFF_POLL_INTERVAL_SEC=10
POWER_OFF_WAIT_TIMEOUT_SEC=180

# Helpers: Kubernetes quantity -> numeric (portable, no bash 4+)
mem_to_gi() {
Expand Down Expand Up @@ -155,14 +157,17 @@ jobs:
# Gather free resources like the scheduler: allocatable - sum(pod requests) per node.
worker_nodes=$(kubectl get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[*].metadata.name}')
gather_node_resources() {
available_mem_gi=0
available_cpu=0
nodes_meeting_min=0
local available_mem_gi=0
local available_cpu=0
local nodes_meeting_min=0
local node node_json alloc_mem_gi alloc_cpu pods_json requested_mem_gi requested_cpu
local node_free_mem node_free_cpu node_ok_mem node_ok_cpu

for node in $worker_nodes; do
[[ -n "$node" ]] || continue
node_json=$(kubectl get node "$node" -o json 2>/dev/null) || true
if [[ -z "$node_json" ]]; then
echo "[WARN] Node $node: could not get node spec, skipping"
echo "[WARN] Node $node: could not get node spec, skipping" >&2
continue
fi

Expand Down Expand Up @@ -205,29 +210,33 @@ jobs:
if [[ "$node_ok_mem" -eq 1 && "$node_ok_cpu" -eq 1 ]]; then
nodes_meeting_min=$((nodes_meeting_min + 1))
else
echo "[INFO] Node $node: does not meet placement min — free ${node_free_mem} Gi RAM, ${node_free_cpu} CPU (required: >= ${MIN_MEM_GI_PER_NODE} Gi, >= ${MIN_CPU_PER_NODE} CPU)"
echo "[INFO] Node $node: does not meet placement min — free ${node_free_mem} Gi RAM, ${node_free_cpu} CPU (required: >= ${MIN_MEM_GI_PER_NODE} Gi, >= ${MIN_CPU_PER_NODE} CPU)" >&2
fi
done

printf '%s\t%s\t%s\n' "$available_mem_gi" "$available_cpu" "$nodes_meeting_min"
}

gather_node_resources
echo "[INFO] Workers: free ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min} (need at least ${MIN_NODES_FOR_PLACEMENT})"
echo "[INFO] Required: ${REQUIRED_MEM_GI} Gi, ${REQUIRED_CPU} CPU; need >= ${MIN_NODES_FOR_PLACEMENT} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU"
echo " "
refresh_resource_state() {
IFS=$'\t' read -r available_mem_gi available_cpu nodes_meeting_min < <(gather_node_resources)
deficit_mem=$(echo "$REQUIRED_MEM_GI - $available_mem_gi" | bc 2>/dev/null || echo "$REQUIRED_MEM_GI")
deficit_cpu=$(echo "$REQUIRED_CPU - $available_cpu" | bc 2>/dev/null || echo "$REQUIRED_CPU")

deficit_mem=$(echo "$REQUIRED_MEM_GI - $available_mem_gi" | bc 2>/dev/null || echo "$REQUIRED_MEM_GI")
deficit_cpu=$(echo "$REQUIRED_CPU - $available_cpu" | bc 2>/dev/null || echo "$REQUIRED_CPU")
total_sufficient=false
if float_le "$deficit_mem" 0 && float_le "$deficit_cpu" 0; then
total_sufficient=true
fi

# Check for sufficient free resources and node availability to proceed with placement
total_sufficient=false
placement_sufficient=false
if float_le "$deficit_mem" 0 && float_le "$deficit_cpu" 0; then
total_sufficient=true
fi
placement_sufficient=false
if [[ $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then
placement_sufficient=true
fi
}

if [[ $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then
placement_sufficient=true
fi
refresh_resource_state
echo "[INFO] Workers: free ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min} (need at least ${MIN_NODES_FOR_PLACEMENT})"
echo "[INFO] Required: ${REQUIRED_MEM_GI} Gi, ${REQUIRED_CPU} CPU; need >= ${MIN_NODES_FOR_PLACEMENT} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU"
echo " "

if $total_sufficient && $placement_sufficient; then
echo "[INFO] Resources sufficient (total + placement), no VMs to power off"
Expand Down Expand Up @@ -256,7 +265,7 @@ jobs:
.items[]
| select(.metadata.namespace | test("^nightly-e2e-|static-cse") | not)
| select(.metadata.labels | tostring | test("e2e-cluster/do-not-stop-vm-on-e2e-run") | not)
| select(.spec.runPolicy != "AlwaysOff")
| select(.status.phase != "Stopped")
| [.metadata.namespace, .metadata.name, (.spec.memory.size // "0"), (.spec.cpu.cores // 0), (.spec.cpu.coreFraction // "100%")]
| @tsv
'
Expand All @@ -271,41 +280,120 @@ jobs:
done | sort -t$'\t' -k1,1 -rn
}

# Keep powering off while: placement not met, or memory/CPU deficit not yet covered
vm_cpu_from_cores_and_fraction() {
local cores="$1" core_frac="$2" frac_pct=100
[[ "$core_frac" =~ ^([0-9]+)%$ ]] && frac_pct="${BASH_REMATCH[1]}"
echo "scale=2; $cores * $frac_pct / 100" | bc
}

print_power_off_plan() {
local plan_index=0 cumulative_mem=0 cumulative_cpu=0
local vm_mem_gi ns name mem_qty cores core_frac vm_cpu

echo "[INFO] Planned power-off order with projected VM-spec resources:"
echo "[INFO] Projection is based on VM spec memory/cpu; actual placement improvement depends on where workloads are running."

while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do
[[ -n "$ns" ]] || continue
plan_index=$((plan_index + 1))
vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac")
cumulative_mem=$(echo "$cumulative_mem + $vm_mem_gi" | bc)
cumulative_cpu=$(echo "$cumulative_cpu + $vm_cpu" | bc)
echo "[PLAN] ${plan_index}. ${ns}/${name} -> ${vm_mem_gi} Gi RAM, ${vm_cpu} CPU (cumulative: ${cumulative_mem} Gi RAM, ${cumulative_cpu} CPU)"
done < "$1"

if [[ $plan_index -eq 0 ]]; then
echo "[WARN] No VM candidates available for power off"
fi
}

count_stopped_requested_vms() {
local requested_vms_file="$1"
local stopped_requested=0 total_requested=0
local ns name phase

while IFS=$'\t' read -r ns name; do
[[ -n "$ns" ]] || continue
total_requested=$((total_requested + 1))
phase=$(kubectl get vm -n "$ns" "$name" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
if [[ "$phase" == "Stopped" ]]; then
stopped_requested=$((stopped_requested + 1))
fi
done < "$requested_vms_file"

printf '%s\t%s\n' "$stopped_requested" "$total_requested"
}

# Keep powering off while current cluster state still does not satisfy placement or total resources.
still_need_to_free() {
if ! $placement_sufficient; then return 0; fi
if float_gt "$deficit_mem" 0 && float_lt "$freed_mem" "$deficit_mem"; then return 0; fi
if float_gt "$deficit_cpu" 0 && float_lt "$freed_cpu" "$deficit_cpu"; then return 0; fi
if ! $total_sufficient; then return 0; fi
return 1
}

freed_mem=0
freed_cpu=0
vm_candidates_file=$(mktemp)
requested_vms_file=$(mktemp)
trap 'rm -f "$vm_candidates_file" "$requested_vms_file"' EXIT
get_vms_candidates | sort_by_mem_desc > "$vm_candidates_file"
print_power_off_plan "$vm_candidates_file"

requested_count=0
while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do
[[ -n "$ns" ]] || continue
still_need_to_free || break
vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac")

frac_pct=100
[[ "$core_frac" =~ ^([0-9]+)%$ ]] && frac_pct="${BASH_REMATCH[1]}"
vm_cpu=$(echo "scale=2; $cores * $frac_pct / 100" | bc)

echo "[INFO] Powering off vm $ns/$name (${vm_mem_gi} Gi, ${vm_cpu} CPU)"
kubectl patch vm -n "$ns" "$name" --type=merge -p '{"spec":{"runPolicy":"AlwaysOff"}}' || true
freed_mem=$(echo "$freed_mem + $vm_mem_gi" | bc)
freed_cpu=$(echo "$freed_cpu + $vm_cpu" | bc)

# Re-check placement after each power-off (pods may take a few seconds to disappear)
if ! $placement_sufficient; then
kubectl wait --for=jsonpath='{.status.phase}'=Stopped vm -n "$ns" "$name" --timeout=90s || true
gather_node_resources
if [[ $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then
placement_sufficient=true
echo "[INFO] Placement now sufficient: ${nodes_meeting_min} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU"
fi
echo "[INFO] Request power off for vm $ns/$name (${vm_mem_gi} Gi, ${vm_cpu} CPU)"
if ! kubectl patch vm -n "$ns" "$name" --type=merge -p '{"spec":{"runPolicy":"AlwaysOff"}}'; then
echo "[WARN] Failed to power off vm $ns/$name, skip it and continue with next candidate"
continue
fi
printf '%s\t%s\n' "$ns" "$name" >> "$requested_vms_file"
requested_count=$((requested_count + 1))
done < "$vm_candidates_file"

if [[ $requested_count -eq 0 ]]; then
echo "[ERROR] No running VM candidates available for power off, but resources are still insufficient."
echo "[ERROR] Human intervention is required."
rm -f "$vm_candidates_file" "$requested_vms_file"
trap - EXIT
exit 1
fi

echo "[INFO] Requested power off for ${requested_count} VM(s). Waiting up to ${POWER_OFF_WAIT_TIMEOUT_SEC}s and checking cluster resources every ${POWER_OFF_POLL_INTERVAL_SEC}s."

wait_elapsed=0
prev_nodes_meeting_min="$nodes_meeting_min"
while true; do
refresh_resource_state
IFS=$'\t' read -r stopped_requested total_requested < <(count_stopped_requested_vms "$requested_vms_file")
echo "[INFO] Current workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}"
echo "[INFO] Requested VMs stopped: ${stopped_requested}/${total_requested}; waited ${wait_elapsed}s/${POWER_OFF_WAIT_TIMEOUT_SEC}s"
if [[ $prev_nodes_meeting_min -lt $MIN_NODES_FOR_PLACEMENT && $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then
echo "[INFO] Placement now sufficient: ${nodes_meeting_min} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU"
fi
prev_nodes_meeting_min="$nodes_meeting_min"

if ! still_need_to_free; then
break
fi

if [[ $total_requested -gt 0 && $stopped_requested -eq $total_requested ]]; then
echo "[INFO] All requested VMs are already stopped; no need to wait further."
break
fi
done < <(get_vms_candidates | sort_by_mem_desc)

echo "[INFO] Freed: ${freed_mem} Gi RAM, ${freed_cpu} CPU"
if [[ $wait_elapsed -ge $POWER_OFF_WAIT_TIMEOUT_SEC ]]; then
break
fi

sleep "$POWER_OFF_POLL_INTERVAL_SEC"
wait_elapsed=$((wait_elapsed + POWER_OFF_POLL_INTERVAL_SEC))
done

rm -f "$vm_candidates_file" "$requested_vms_file"
trap - EXIT

echo "[INFO] Final workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}"

if still_need_to_free; then
echo "[ERROR] Stopping VMs did not free enough resources. Human intervention is required."
Expand Down
Loading