From 67601c4da7d2ee6681e428dc722141202e276d28 Mon Sep 17 00:00:00 2001 From: Arjun Raja Yogidas Date: Sat, 4 Oct 2025 13:43:52 +0000 Subject: [PATCH 1/3] Update systemd logic, cleanup redundancy Signed-off-by: Arjun Raja Yogidas --- Dockerfile | 2 +- .../container_health_check_linux_test.go | 25 +- cmd/nerdctl/container/container_run.go | 5 +- hack/test-integration.sh | 4 +- pkg/containerutil/containerutil.go | 10 +- pkg/healthcheck/healthcheck_manager_darwin.go | 5 + .../healthcheck_manager_freebsd.go | 5 + pkg/healthcheck/healthcheck_manager_linux.go | 388 ++++++++++++------ .../healthcheck_manager_windows.go | 5 + 9 files changed, 306 insertions(+), 143 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4443b3ffba6..51f977a30bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -286,7 +286,7 @@ RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \ bash-completion \ ca-certificates curl \ iproute2 iptables \ - dbus dbus-user-session systemd systemd-sysv \ + dbus dbus-user-session dbus-x11 systemd systemd-sysv \ fuse3 COPY --from=build-full /docker-entrypoint.sh /docker-entrypoint.sh COPY --from=out-full / /usr/local/ diff --git a/cmd/nerdctl/container/container_health_check_linux_test.go b/cmd/nerdctl/container/container_health_check_linux_test.go index 9ce502f1523..fdba33f8288 100644 --- a/cmd/nerdctl/container/container_health_check_linux_test.go +++ b/cmd/nerdctl/container/container_health_check_linux_test.go @@ -32,7 +32,6 @@ import ( "github.com/containerd/nerdctl/mod/tigron/tig" "github.com/containerd/nerdctl/v2/pkg/healthcheck" - "github.com/containerd/nerdctl/v2/pkg/rootlessutil" "github.com/containerd/nerdctl/v2/pkg/testutil" "github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest" ) @@ -44,9 +43,9 @@ func TestContainerHealthCheckBasic(t *testing.T) { testCase.Require = require.Not(nerdtest.Docker) // Skip systemd tests in rootless environment to bypass dbus permission issues - if rootlessutil.IsRootless() { - t.Skip("systemd healthcheck tests are skipped in rootless environment") - } + // if rootlessutil.IsRootless() { + // t.Skip("systemd healthcheck tests are skipped in rootless environment") + // } testCase.SubTests = []*test.Case{ { @@ -146,9 +145,9 @@ func TestContainerHealthCheckAdvance(t *testing.T) { testCase.Require = require.Not(nerdtest.Docker) // Skip systemd tests in rootless environment to bypass dbus permission issues - if rootlessutil.IsRootless() { - t.Skip("systemd healthcheck tests are skipped in rootless environment") - } + // if rootlessutil.IsRootless() { + // t.Skip("systemd healthcheck tests are skipped in rootless environment") + // } testCase.SubTests = []*test.Case{ { @@ -618,9 +617,9 @@ func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) { testCase := nerdtest.Setup() testCase.Require = require.Not(nerdtest.Docker) // Skip systemd tests in rootless environment to bypass dbus permission issues - if rootlessutil.IsRootless() { - t.Skip("systemd healthcheck tests are skipped in rootless environment") - } + // if rootlessutil.IsRootless() { + // t.Skip("systemd healthcheck tests are skipped in rootless environment") + // } testCase.SubTests = []*test.Case{ { @@ -802,9 +801,9 @@ func TestHealthCheck_SystemdIntegration_Advanced(t *testing.T) { testCase := nerdtest.Setup() testCase.Require = require.Not(nerdtest.Docker) // Skip systemd tests in rootless environment to bypass dbus permission issues - if rootlessutil.IsRootless() { - t.Skip("systemd healthcheck tests are skipped in rootless environment") - } + // if rootlessutil.IsRootless() { + // t.Skip("systemd healthcheck tests are skipped in rootless environment") + // } testCase.SubTests = []*test.Case{ { diff --git a/cmd/nerdctl/container/container_run.go b/cmd/nerdctl/container/container_run.go index 9b44feb19c8..e20f97c97ca 100644 --- a/cmd/nerdctl/container/container_run.go +++ b/cmd/nerdctl/container/container_run.go @@ -447,12 +447,9 @@ func runAction(cmd *cobra.Command, args []string) error { } // Setup container healthchecks. - if err := healthcheck.CreateTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { + if err := healthcheck.CreateAndStartTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } - if err := healthcheck.StartTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { - return fmt.Errorf("failed to start healthcheck timer: %w", err) - } if createOpt.Detach { fmt.Fprintln(createOpt.Stdout, id) diff --git a/hack/test-integration.sh b/hack/test-integration.sh index cdbeb61957f..13671b34917 100755 --- a/hack/test-integration.sh +++ b/hack/test-integration.sh @@ -50,7 +50,7 @@ for arg in "$@"; do done if [ "$needsudo" == "true" ] || [ "$needsudo" == "yes" ] || [ "$needsudo" == "1" ]; then - gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -exec sudo -args -test.allow-kill-daemon "$@" + gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -exec sudo -v -run TestHealthCheck_SystemdIntegration_Advanced -args -test.allow-kill-daemon ./cmd/nerdctl/container/ else - gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@" + gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -v -run TestHealthCheck_SystemdIntegration_Advanced -args -test.allow-kill-daemon ./cmd/nerdctl/container/ fi diff --git a/pkg/containerutil/containerutil.go b/pkg/containerutil/containerutil.go index 7805ad10b92..9e13994dfa6 100644 --- a/pkg/containerutil/containerutil.go +++ b/pkg/containerutil/containerutil.go @@ -293,12 +293,9 @@ func Start(ctx context.Context, container containerd.Container, isAttach bool, i } // If container has health checks configured, create and start systemd timer/service files. - if err := healthcheck.CreateTimer(ctx, container, cfg); err != nil { + if err := healthcheck.CreateAndStartTimer(ctx, container, cfg); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } - if err := healthcheck.StartTimer(ctx, container, cfg); err != nil { - return fmt.Errorf("failed to start healthcheck timer: %w", err) - } if !isAttach { return nil @@ -532,12 +529,9 @@ func Unpause(ctx context.Context, client *containerd.Client, id string, cfg *con } // Recreate healthcheck related systemd timer/service files. - if err := healthcheck.CreateTimer(ctx, container, cfg); err != nil { + if err := healthcheck.CreateAndStartTimer(ctx, container, cfg); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } - if err := healthcheck.StartTimer(ctx, container, cfg); err != nil { - return fmt.Errorf("failed to start healthcheck timer: %w", err) - } switch status.Status { case containerd.Paused: diff --git a/pkg/healthcheck/healthcheck_manager_darwin.go b/pkg/healthcheck/healthcheck_manager_darwin.go index b708b574281..89768d62673 100644 --- a/pkg/healthcheck/healthcheck_manager_darwin.go +++ b/pkg/healthcheck/healthcheck_manager_darwin.go @@ -29,6 +29,11 @@ func CreateTimer(ctx context.Context, container containerd.Container, cfg *confi return nil } +// CreateAndStartTimer sets up the transient systemd timer and service for healthchecks. +func CreateAndStartTimer(ctx context.Context, container containerd.Container, cfg *config.Config) error { + return nil +} + // StartTimer starts the healthcheck timer unit. func StartTimer(ctx context.Context, container containerd.Container, cfg *config.Config) error { return nil diff --git a/pkg/healthcheck/healthcheck_manager_freebsd.go b/pkg/healthcheck/healthcheck_manager_freebsd.go index b708b574281..030348549e4 100644 --- a/pkg/healthcheck/healthcheck_manager_freebsd.go +++ b/pkg/healthcheck/healthcheck_manager_freebsd.go @@ -34,6 +34,11 @@ func StartTimer(ctx context.Context, container containerd.Container, cfg *config return nil } +// CreateAndStartTimer sets up the transient systemd timer and service for healthchecks. +func CreateAndStartTimer(ctx context.Context, container containerd.Container, cfg *config.Config) error { + return nil +} + // RemoveTransientHealthCheckFiles stops and cleans up the transient timer and service. func RemoveTransientHealthCheckFiles(ctx context.Context, container containerd.Container) error { return nil diff --git a/pkg/healthcheck/healthcheck_manager_linux.go b/pkg/healthcheck/healthcheck_manager_linux.go index e043b5c2d37..ac44d40ea95 100644 --- a/pkg/healthcheck/healthcheck_manager_linux.go +++ b/pkg/healthcheck/healthcheck_manager_linux.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "strings" "time" @@ -117,120 +118,120 @@ func RemoveTransientHealthCheckFiles(ctx context.Context, container containerd.C // ForceRemoveTransientHealthCheckFiles forcefully stops and cleans up the transient timer and service // using just the container ID. This function is non-blocking and uses timeouts to prevent hanging // on systemd operations. It logs errors as warnings but continues cleanup attempts. -func ForceRemoveTransientHealthCheckFiles(ctx context.Context, containerID string) error { - log.G(ctx).Debugf("Force removing healthcheck timer unit: %s", containerID) - - // Create a timeout context for systemd operations - timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Second) - defer cancel() - - timer := containerID + ".timer" - service := containerID + ".service" - - // Channel to collect any critical errors (though we'll continue cleanup regardless) - errChan := make(chan error, 3) - - // Goroutine for DBUS connection and cleanup operations - go func() { - defer close(errChan) - - var conn *dbus.Conn - var err error - if rootlessutil.IsRootless() { - conn, err = dbus.NewUserConnectionContext(ctx) - } else { - conn, err = dbus.NewSystemConnectionContext(ctx) - } - if err != nil { - log.G(ctx).Warnf("systemd DBUS connect error during force cleanup: %v", err) - errChan <- fmt.Errorf("systemd DBUS connect error: %w", err) - return - } - defer conn.Close() - - // Stop timer with timeout - go func() { - select { - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("timeout stopping timer %s during force cleanup", timer) - return - default: - tChan := make(chan string, 1) - if _, err := conn.StopUnitContext(timeoutCtx, timer, "ignore-dependencies", tChan); err == nil { - select { - case msg := <-tChan: - if msg != "done" { - log.G(ctx).Warnf("timer stop message during force cleanup: %s", msg) - } - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("timeout waiting for timer stop confirmation: %s", timer) - } - } else { - log.G(ctx).Warnf("failed to stop timer %s during force cleanup: %v", timer, err) - } - } - }() - - // Stop service with timeout - go func() { - select { - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("timeout stopping service %s during force cleanup", service) - return - default: - sChan := make(chan string, 1) - if _, err := conn.StopUnitContext(timeoutCtx, service, "ignore-dependencies", sChan); err == nil { - select { - case msg := <-sChan: - if msg != "done" { - log.G(ctx).Warnf("service stop message during force cleanup: %s", msg) - } - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("timeout waiting for service stop confirmation: %s", service) - } - } else { - log.G(ctx).Warnf("failed to stop service %s during force cleanup: %v", service, err) - } - } - }() - - // Reset failed units (best effort, non-blocking) - go func() { - select { - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("timeout resetting failed unit %s during force cleanup", service) - return - default: - if err := conn.ResetFailedUnitContext(timeoutCtx, service); err != nil { - log.G(ctx).Warnf("failed to reset failed unit %s during force cleanup: %v", service, err) - } - } - }() - - // Wait a short time for operations to complete, but don't block indefinitely - select { - case <-time.After(3 * time.Second): - log.G(ctx).Debugf("force cleanup operations completed for container %s", containerID) - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("force cleanup timed out for container %s", containerID) - } - }() - - // Wait for the cleanup goroutine to finish or timeout - select { - case err := <-errChan: - if err != nil { - log.G(ctx).Warnf("force cleanup encountered errors but continuing: %v", err) - } - case <-timeoutCtx.Done(): - log.G(ctx).Warnf("force cleanup timed out for container %s, but cleanup may continue in background", containerID) - } - - // Always return nil - this function should never block the caller - // even if systemd operations fail or timeout - log.G(ctx).Debugf("force cleanup completed (non-blocking) for container %s", containerID) - return nil -} +// func ForceRemoveTransientHealthCheckFiles(ctx context.Context, containerID string) error { +// log.G(ctx).Debugf("Force removing healthcheck timer unit: %s", containerID) + +// // Create a timeout context for systemd operations +// timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Second) +// defer cancel() + +// timer := containerID + ".timer" +// service := containerID + ".service" + +// // Channel to collect any critical errors (though we'll continue cleanup regardless) +// errChan := make(chan error, 3) + +// // Goroutine for DBUS connection and cleanup operations +// go func() { +// defer close(errChan) + +// var conn *dbus.Conn +// var err error +// if rootlessutil.IsRootless() { +// conn, err = dbus.NewUserConnectionContext(ctx) +// } else { +// conn, err = dbus.NewSystemConnectionContext(ctx) +// } +// if err != nil { +// log.G(ctx).Warnf("systemd DBUS connect error during force cleanup: %v", err) +// errChan <- fmt.Errorf("systemd DBUS connect error: %w", err) +// return +// } +// defer conn.Close() + +// // Stop timer with timeout +// go func() { +// select { +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("timeout stopping timer %s during force cleanup", timer) +// return +// default: +// tChan := make(chan string, 1) +// if _, err := conn.StopUnitContext(timeoutCtx, timer, "ignore-dependencies", tChan); err == nil { +// select { +// case msg := <-tChan: +// if msg != "done" { +// log.G(ctx).Warnf("timer stop message during force cleanup: %s", msg) +// } +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("timeout waiting for timer stop confirmation: %s", timer) +// } +// } else { +// log.G(ctx).Warnf("failed to stop timer %s during force cleanup: %v", timer, err) +// } +// } +// }() + +// // Stop service with timeout +// go func() { +// select { +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("timeout stopping service %s during force cleanup", service) +// return +// default: +// sChan := make(chan string, 1) +// if _, err := conn.StopUnitContext(timeoutCtx, service, "ignore-dependencies", sChan); err == nil { +// select { +// case msg := <-sChan: +// if msg != "done" { +// log.G(ctx).Warnf("service stop message during force cleanup: %s", msg) +// } +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("timeout waiting for service stop confirmation: %s", service) +// } +// } else { +// log.G(ctx).Warnf("failed to stop service %s during force cleanup: %v", service, err) +// } +// } +// }() + +// // Reset failed units (best effort, non-blocking) +// go func() { +// select { +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("timeout resetting failed unit %s during force cleanup", service) +// return +// default: +// if err := conn.ResetFailedUnitContext(timeoutCtx, service); err != nil { +// log.G(ctx).Warnf("failed to reset failed unit %s during force cleanup: %v", service, err) +// } +// } +// }() + +// // Wait a short time for operations to complete, but don't block indefinitely +// select { +// case <-time.After(3 * time.Second): +// log.G(ctx).Debugf("force cleanup operations completed for container %s", containerID) +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("force cleanup timed out for container %s", containerID) +// } +// }() + +// // Wait for the cleanup goroutine to finish or timeout +// select { +// case err := <-errChan: +// if err != nil { +// log.G(ctx).Warnf("force cleanup encountered errors but continuing: %v", err) +// } +// case <-timeoutCtx.Done(): +// log.G(ctx).Warnf("force cleanup timed out for container %s, but cleanup may continue in background", containerID) +// } + +// // Always return nil - this function should never block the caller +// // even if systemd operations fail or timeout +// log.G(ctx).Debugf("force cleanup completed (non-blocking) for container %s", containerID) +// return nil +// } func extractHealthcheck(ctx context.Context, container containerd.Container) *Healthcheck { l, err := container.Labels(ctx) @@ -253,7 +254,7 @@ func extractHealthcheck(ctx context.Context, container containerd.Container) *He // shouldSkipHealthCheckSystemd determines if healthcheck timers should be skipped. func shouldSkipHealthCheckSystemd(hc *Healthcheck, cfg *config.Config) bool { // Don't proceed if systemd is unavailable or disabled - if !defaults.IsSystemdAvailable() || cfg.DisableHCSystemd || rootlessutil.IsRootless() { + if !defaults.IsSystemdAvailable() || cfg.DisableHCSystemd { return true } @@ -263,3 +264,160 @@ func shouldSkipHealthCheckSystemd(hc *Healthcheck, cfg *config.Config) bool { } return false } + +func ForceRemoveTransientHealthCheckFiles(ctx context.Context, containerID string) error { + timer := containerID + ".timer" + service := containerID + ".service" + + // Use a short timeout to avoid hanging + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + // Connect to the right systemd instance + var conn *dbus.Conn + var err error + if rootlessutil.IsRootless() { + conn, err = dbus.NewUserConnectionContext(timeoutCtx) + } else { + conn, err = dbus.NewSystemConnectionContext(timeoutCtx) + } + if err != nil { + return fmt.Errorf("systemd DBUS connect error: %w", err) + } + defer conn.Close() + + // Stop the timer and service units (best effort) + stopAndWait := func(unit string) { + ch := make(chan string, 1) + _, err := conn.StopUnitContext(timeoutCtx, unit, "ignore-dependencies", ch) + if err == nil { + select { + case <-ch: + case <-timeoutCtx.Done(): + } + } + } + stopAndWait(timer) + stopAndWait(service) + + // Disable the timer unit (best effort) + _, _ = conn.DisableUnitFilesContext(timeoutCtx, []string{timer}, false) + + // Reset failed state (best effort) + _ = conn.ResetFailedUnitContext(timeoutCtx, service) + + // Remove unit files + var unitDir string + if rootlessutil.IsRootless() { + unitDir = filepath.Join(os.Getenv("HOME"), ".config/systemd/user") + } else { + unitDir = "/etc/systemd/system" + } + timerPath := filepath.Join(unitDir, timer) + servicePath := filepath.Join(unitDir, service) + _ = os.Remove(timerPath) + _ = os.Remove(servicePath) + + // Reload systemd to apply changes + _ = conn.ReloadContext(timeoutCtx) + + return nil +} + +func CreateAndStartTimer(ctx context.Context, container containerd.Container, cfg *config.Config) error { + hc := extractHealthcheck(ctx, container) + if hc == nil { + return nil + } + if shouldSkipHealthCheckSystemd(hc, cfg) { + return nil + } + + containerID := container.ID() + + // Generate service and timer unit content + serviceContent := generateServiceContent(containerID, ctx) + timerContent := generateTimerContent(containerID, hc.Interval) + + // Determine the unit path + var unitDir string + var conn *dbus.Conn + var err error + + if rootlessutil.IsRootless() { + unitDir = filepath.Join(os.Getenv("HOME"), ".config/systemd/user") + conn, err = dbus.NewUserConnectionContext(ctx) + } else { + unitDir = "/etc/systemd/system" + conn, err = dbus.NewSystemConnectionContext(ctx) + } + if err != nil { + return fmt.Errorf("systemd DBUS connect error: %w", err) + } + defer conn.Close() + + // Write unit files + if err := os.MkdirAll(unitDir, 0755); err != nil { + return err + } + servicePath := filepath.Join(unitDir, containerID+".service") + timerPath := filepath.Join(unitDir, containerID+".timer") + + if err := os.WriteFile(servicePath, []byte(serviceContent), 0644); err != nil { + return err + } + if err := os.WriteFile(timerPath, []byte(timerContent), 0644); err != nil { + return err + } + + // Reload systemd and enable/start timer + if err := conn.ReloadContext(ctx); err != nil { + return fmt.Errorf("systemd reload failed: %w", err) + } + _, _, err = conn.EnableUnitFilesContext(ctx, []string{containerID + ".timer"}, false, true) + if err != nil { + return fmt.Errorf("enable timer failed: %w", err) + } + _, err = conn.StartUnitContext(ctx, containerID+".timer", "replace", nil) + if err != nil { + return fmt.Errorf("start timer failed: %w", err) + } + + log.G(ctx).Debugf("Created and started healthcheck timer unit: %s", containerID) + return nil +} + +// generateServiceContent creates the systemd service unit content for healthcheck +func generateServiceContent(containerID string, ctx context.Context) string { + return fmt.Sprintf(`[Unit] +Description=Healthcheck for container %s + +[Service] +Type=oneshot +Environment=PATH=%s +ExecStart=%s +`, containerID, os.Getenv("PATH"), buildHealthcheckCommand(containerID, ctx)) +} + +// generateTimerContent creates the systemd timer unit content for healthcheck +func generateTimerContent(containerID string, interval time.Duration) string { + return fmt.Sprintf(`[Unit] +Description=Healthcheck timer for container %s + +[Timer] +OnUnitInactiveSec=%ds +AccuracySec=1s + +[Install] +WantedBy=timers.target +`, containerID, int(interval.Seconds())) +} + +// Helper to build the healthcheck exec command +func buildHealthcheckCommand(containerID string, ctx context.Context) string { + cmd := fmt.Sprintf("nerdctl container healthcheck %s", containerID) + if log.G(ctx).Logger.IsLevelEnabled(log.DebugLevel) { + cmd += " --debug" + } + return cmd +} diff --git a/pkg/healthcheck/healthcheck_manager_windows.go b/pkg/healthcheck/healthcheck_manager_windows.go index 1da386fe2bc..d632d7fca4a 100644 --- a/pkg/healthcheck/healthcheck_manager_windows.go +++ b/pkg/healthcheck/healthcheck_manager_windows.go @@ -34,6 +34,11 @@ func StartTimer(ctx context.Context, container containerd.Container, cfg *config return nil } +// CreateAndStartTimer sets up the transient systemd timer and service for healthchecks. +func CreateAndStartTimer(ctx context.Context, container containerd.Container, cfg *config.Config) error { + return nil +} + // RemoveTransientHealthCheckFiles stops and cleans up the transient timer and service. func RemoveTransientHealthCheckFiles(ctx context.Context, container containerd.Container) error { return nil From 003d7571063afffe96fd4b0e77c26369130baccc Mon Sep 17 00:00:00 2001 From: Arjun Raja Yogidas Date: Sat, 4 Oct 2025 15:21:42 +0000 Subject: [PATCH 2/3] Test updating systemd dependency Signed-off-by: Arjun Raja Yogidas --- Dockerfile | 2 ++ Dockerfile.d/test-integration-rootless.sh | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Dockerfile b/Dockerfile index 51f977a30bc..0079aa0d237 100644 --- a/Dockerfile +++ b/Dockerfile @@ -363,6 +363,8 @@ RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ uidmap \ openssh-server \ openssh-client +# Enable D-Bus user session for systemd healthcheck timers in rootless mode +RUN systemctl --global enable dbus.socket dbus.service # TODO: update containerized-systemd to enable sshd by default, or allow `systemctl wants ssh` here RUN ssh-keygen -q -t rsa -f /root/.ssh/id_rsa -N '' && \ useradd -m -s /bin/bash rootless && \ diff --git a/Dockerfile.d/test-integration-rootless.sh b/Dockerfile.d/test-integration-rootless.sh index f6e243f32b5..ae229f32aaf 100755 --- a/Dockerfile.d/test-integration-rootless.sh +++ b/Dockerfile.d/test-integration-rootless.sh @@ -34,6 +34,10 @@ if [[ "$(id -u)" = "0" ]]; then systemctl start ssh exec ssh -o StrictHostKeyChecking=no rootless@localhost "$0" "$@" else + # Start D-Bus user session for systemd healthcheck timers + systemctl --user start dbus.socket dbus.service || true + export DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus" + containerd-rootless-setuptool.sh install if grep -q "options use-vc" /etc/resolv.conf; then containerd-rootless-setuptool.sh nsenter -- sh -euc 'echo "options use-vc" >>/etc/resolv.conf' From e01cf0dd56c5662461350dcab885a4a5cc9ed3d4 Mon Sep 17 00:00:00 2001 From: Arjun Raja Yogidas Date: Sat, 4 Oct 2025 15:39:49 +0000 Subject: [PATCH 3/3] Test with old impl Signed-off-by: Arjun Raja Yogidas --- Dockerfile.d/test-integration-rootless.sh | 16 ++++++++++++++-- cmd/nerdctl/container/container_run.go | 5 ++++- pkg/containerutil/containerutil.go | 7 +++++-- pkg/healthcheck/healthcheck_manager_linux.go | 19 +++++++++++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Dockerfile.d/test-integration-rootless.sh b/Dockerfile.d/test-integration-rootless.sh index ae229f32aaf..a2a1b14dbc4 100755 --- a/Dockerfile.d/test-integration-rootless.sh +++ b/Dockerfile.d/test-integration-rootless.sh @@ -34,10 +34,22 @@ if [[ "$(id -u)" = "0" ]]; then systemctl start ssh exec ssh -o StrictHostKeyChecking=no rootless@localhost "$0" "$@" else - # Start D-Bus user session for systemd healthcheck timers - systemctl --user start dbus.socket dbus.service || true + # Ensure XDG_RUNTIME_DIR is set and create it if needed + export XDG_RUNTIME_DIR="/run/user/$(id -u)" + mkdir -p "$XDG_RUNTIME_DIR" + chmod 700 "$XDG_RUNTIME_DIR" + + # Start systemd user session and D-Bus for healthcheck timers + systemctl --user daemon-reload || true + systemctl --user start dbus.socket || true + systemctl --user start dbus.service || true + + # Set D-Bus session address export DBUS_SESSION_BUS_ADDRESS="unix:path=$XDG_RUNTIME_DIR/bus" + # Wait a moment for D-Bus to be ready + sleep 1 + containerd-rootless-setuptool.sh install if grep -q "options use-vc" /etc/resolv.conf; then containerd-rootless-setuptool.sh nsenter -- sh -euc 'echo "options use-vc" >>/etc/resolv.conf' diff --git a/cmd/nerdctl/container/container_run.go b/cmd/nerdctl/container/container_run.go index e20f97c97ca..9b44feb19c8 100644 --- a/cmd/nerdctl/container/container_run.go +++ b/cmd/nerdctl/container/container_run.go @@ -447,9 +447,12 @@ func runAction(cmd *cobra.Command, args []string) error { } // Setup container healthchecks. - if err := healthcheck.CreateAndStartTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { + if err := healthcheck.CreateTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } + if err := healthcheck.StartTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { + return fmt.Errorf("failed to start healthcheck timer: %w", err) + } if createOpt.Detach { fmt.Fprintln(createOpt.Stdout, id) diff --git a/pkg/containerutil/containerutil.go b/pkg/containerutil/containerutil.go index 9e13994dfa6..fe0d2ef4533 100644 --- a/pkg/containerutil/containerutil.go +++ b/pkg/containerutil/containerutil.go @@ -293,9 +293,12 @@ func Start(ctx context.Context, container containerd.Container, isAttach bool, i } // If container has health checks configured, create and start systemd timer/service files. - if err := healthcheck.CreateAndStartTimer(ctx, container, cfg); err != nil { + if err := healthcheck.CreateTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } + if err := healthcheck.StartTimer(ctx, c, (*config.Config)(&createOpt.GOptions)); err != nil { + return fmt.Errorf("failed to start healthcheck timer: %w", err) + } if !isAttach { return nil @@ -529,7 +532,7 @@ func Unpause(ctx context.Context, client *containerd.Client, id string, cfg *con } // Recreate healthcheck related systemd timer/service files. - if err := healthcheck.CreateAndStartTimer(ctx, container, cfg); err != nil { + if err := healthcheck.CreateTimer(ctx, container, cfg); err != nil { return fmt.Errorf("failed to create healthcheck timer: %w", err) } diff --git a/pkg/healthcheck/healthcheck_manager_linux.go b/pkg/healthcheck/healthcheck_manager_linux.go index ac44d40ea95..b39bfc8a8b5 100644 --- a/pkg/healthcheck/healthcheck_manager_linux.go +++ b/pkg/healthcheck/healthcheck_manager_linux.go @@ -63,10 +63,18 @@ func CreateTimer(ctx context.Context, container containerd.Container, cfg *confi } log.G(ctx).Debugf("creating healthcheck timer with: systemd-run %s", strings.Join(cmdOpts, " ")) + + // Add user flag for rootless mode + if rootlessutil.IsRootless() { + cmdOpts = append([]string{"--user"}, cmdOpts...) + } + run := exec.Command("systemd-run", cmdOpts...) if out, err := run.CombinedOutput(); err != nil { + log.G(ctx).Errorf("systemd-run failed for container %s: %v\noutput: %s", containerID, err, strings.TrimSpace(string(out))) return fmt.Errorf("systemd-run failed: %w\noutput: %s", err, strings.TrimSpace(string(out))) } + log.G(ctx).Debugf("Successfully created healthcheck timer for container %s", containerID) return nil } @@ -371,7 +379,18 @@ func CreateAndStartTimer(ctx context.Context, container containerd.Container, cf } // Reload systemd and enable/start timer + log.G(ctx).Debugf("Attempting systemd reload for container %s", containerID) if err := conn.ReloadContext(ctx); err != nil { + // Log additional debugging information + log.G(ctx).Errorf("systemd reload failed for container %s: %v", containerID, err) + + // Try to get more information about the systemd state + if rootlessutil.IsRootless() { + log.G(ctx).Debugf("Running in rootless mode, checking user systemd status") + } else { + log.G(ctx).Debugf("Running in rootful mode, checking system systemd status") + } + return fmt.Errorf("systemd reload failed: %w", err) } _, _, err = conn.EnableUnitFilesContext(ctx, []string{containerID + ".timer"}, false, true)