Skip to content

Commit 24ca662

Browse files
committed
Fix race condition causing sshd start failure during provisioning
* Run first-boot tasks via systemd so sshd never races with host-key regeneration. The old `rc.local` script ran after network.target, but in parallel with other regular system services, like ssh.service. Therefore, ssh.service often started (and restarted) while `/root/firstboot.sh` was deleting keys. cloud-init’s set-passwords module made this worse by restarting ssh mid-run. * Replace `rc.local` with a oneshot firstboot.service (delete keys, create new keys, reconfigure sysstat) that runs Before=ssh.service and leaves the `/root/firstboot_done` file as a marker. * Add a cloud-config.service drop-in so cloud-init's config stage waits for firstboot.service, and * Update walinuxagent.service to wait for firstboot.service, ensuring ssh keys have been regenerated. This guarantees sshd, cloud-init, and WALinuxAgent all start only after the first-boot tasks succeed.
1 parent 4ccb5ce commit 24ca662

File tree

7 files changed

+30
-24
lines changed

7 files changed

+30
-24
lines changed

stemcell_builder/stages/base_ubuntu_firstboot/apply.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ set -e
55
base_dir=$(readlink -nf $(dirname $0)/../..)
66
source $base_dir/lib/prelude_apply.bash
77

8-
cp $assets_dir/etc/rc.local $chroot/etc/rc.local
9-
cp $assets_dir/root/firstboot.sh $chroot/root/firstboot.sh
10-
chmod u+x "${chroot}/etc/rc.local"
11-
chmod 0755 $chroot/root/firstboot.sh
8+
install -D -m 0644 \
9+
$assets_dir/etc/systemd/system/firstboot.service \
10+
$chroot/etc/systemd/system/firstboot.service
11+
12+
run_in_chroot $chroot "systemctl enable firstboot.service"

stemcell_builder/stages/base_ubuntu_firstboot/assets/etc/rc.local

Lines changed: 0 additions & 9 deletions
This file was deleted.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[Unit]
2+
Description=Run first boot tasks
3+
ConditionPathExists=!/root/firstboot_done
4+
Before=ssh.service
5+
6+
[Service]
7+
Type=oneshot
8+
ExecStartPre=/bin/sh -c '/bin/rm -f /etc/ssh/ssh_host*key*'
9+
ExecStart=/usr/bin/ssh-keygen -A -v
10+
ExecStartPost=/usr/sbin/dpkg-reconfigure -fnoninteractive sysstat
11+
ExecStartPost=/usr/bin/touch /root/firstboot_done
12+
RemainAfterExit=yes
13+
14+
[Install]
15+
WantedBy=multi-user.target

stemcell_builder/stages/base_ubuntu_firstboot/assets/root/firstboot.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

stemcell_builder/stages/system_azure_init/apply.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,16 @@ cat > $chroot/etc/logrotate.d/waagent <<EOS
4848
}
4949
EOS
5050

51-
#setup cloud-init
51+
# Setup cloud-init
5252
rm $chroot/etc/cloud/*.cfg
5353
rm $chroot/etc/cloud/cloud.cfg.d/*.cfg
5454
cp -f $dir/assets/etc/cloud-init/cloud.cfg $chroot/etc/cloud/cloud.cfg
5555
cp -f $dir/assets/etc/cloud-init/*-*.cfg $chroot/etc/cloud/cloud.cfg.d/
5656

57+
# Ensures that cloud-init waits until host keys have been regenerated.
58+
mkdir -p $chroot/etc/systemd/system/cloud-config.service.d
59+
cp -f $dir/assets/etc/systemd/system/cloud-config.service.d/firstboot-blocker.conf \
60+
$chroot/etc/systemd/system/cloud-config.service.d/firstboot-blocker.conf
5761

5862
# this will append the following two relevant lines (plus a few commented out lines)
5963
# to the default-conf:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[Unit]
2+
Wants=firstboot.service
3+
After=firstboot.service

stemcell_builder/stages/system_azure_init/assets/etc/waagent/walinuxagent.service

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,15 @@
77
[Unit]
88
Description=Azure Linux Agent
99

10-
After=network-online.target cloud-init.service
10+
# NON-DEFAULT: Must run after the firstboot.service, which regenerates the ssh keys
11+
After=firstboot.service network-online.target cloud-init.service
1112
Wants=network-online.target sshd.service sshd-keygen.service
1213

1314
ConditionFileIsExecutable=/usr/sbin/waagent
1415
ConditionPathExists=/etc/waagent.conf
1516

1617
[Service]
1718
Type=simple
18-
# stemcells on Azure re-generate the SSH Hostkey upon first reboot
19-
# waagent has to wait until the file was recreated
20-
ExecStartPre=/bin/bash -c "while [ ! -f /root/firstboot_done ]; do sleep 1; done"
2119
ExecStart=/usr/bin/python3 -u /usr/sbin/waagent -daemon
2220
Restart=always
2321
Slice=azure.slice

0 commit comments

Comments
 (0)