Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 57 additions & 42 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -5716,54 +5716,61 @@ public void run(FlowTrigger trigger, Map data) {

@Override
public boolean skip(Map data) {
// ZSTAC-84446: run detection whenever TLS is enabled so check
// and first-deploy share the same IP source.
return CoreGlobalProperty.UNIT_TEST_ON
|| !KVMGlobalConfig.LIBVIRT_TLS_ENABLED.value(Boolean.class)
|| !rcf.getResourceConfigValue(
KVMGlobalConfig.RECONNECT_HOST_RESTART_LIBVIRTD_SERVICE,
self.getUuid(), Boolean.class);
|| !KVMGlobalConfig.LIBVIRT_TLS_ENABLED.value(Boolean.class);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

@Override
public void run(FlowTrigger trigger, Map data) {
String managementIp = getSelf().getManagementIp();

SshShell sshShell = new SshShell();
sshShell.setHostname(managementIp);
sshShell.setUsername(getSelf().getUsername());
sshShell.setPassword(getSelf().getPassword());
sshShell.setPort(getSelf().getPort());

// Use the same logic as zstack-utility host_plugin.fact() so MN's
// expectation matches what the host itself reports.
String certIpList = KVMHostUtils.collectHostIps(
sshShell, self.getUuid(), managementIp);
List<String> allIps = new ArrayList<>(Arrays.asList(certIpList.split(",")));
// Save detected IPs so apply-ansible-playbook can union with EXTRA_IPS
// without running a second SSH.
data.put("TLS_DETECTED_IPS", certIpList);

SshResult sanResult = sshShell.runCommand(
"openssl x509 -in /etc/pki/libvirt/servercert.pem -noout -ext subjectAltName 2>/dev/null");

boolean needDeploy = false;
if (sanResult.isSshFailure() || sanResult.getReturnCode() != 0
|| sanResult.getStdout() == null || sanResult.getStdout().trim().isEmpty()) {
// cert doesn't exist or can't be read
logger.info(String.format("TLS cert not found or unreadable on host[uuid:%s], need deploy", self.getUuid()));
needDeploy = true;
} else {
Set<String> sanIps = parseSanIps(sanResult.getStdout());
for (String ip : allIps) {
if (!sanIps.contains(ip)) {
logger.info(String.format("TLS cert SAN missing IP %s on host[uuid:%s], need deploy", ip, self.getUuid()));
needDeploy = true;
break;
// ZSTAC-84446: detection is best-effort. SSH failures must NOT
// break reconnect; on error we skip and let the deploy step
// fall back to mgmtIp + EXTRA_IPS.
try {
String managementIp = getSelf().getManagementIp();

SshShell sshShell = new SshShell();
sshShell.setHostname(managementIp);
sshShell.setUsername(getSelf().getUsername());
sshShell.setPassword(getSelf().getPassword());
sshShell.setPort(getSelf().getPort());

// Same logic as zstack-utility host_plugin.fact() so MN's
// expectation matches what the host itself reports.
String certIpList = KVMHostUtils.collectHostIps(
sshShell, self.getUuid(), managementIp);
List<String> allIps = new ArrayList<>(Arrays.asList(certIpList.split(",")));
// Save detected IPs so apply-ansible-playbook can union with
// EXTRA_IPS without running a second SSH.
data.put("TLS_DETECTED_IPS", certIpList);

SshResult sanResult = sshShell.runCommand(
"openssl x509 -in /etc/pki/libvirt/servercert.pem -noout -ext subjectAltName 2>/dev/null");

boolean needDeploy = false;
if (sanResult.isSshFailure() || sanResult.getReturnCode() != 0
|| sanResult.getStdout() == null || sanResult.getStdout().trim().isEmpty()) {
logger.info(String.format("TLS cert not found or unreadable on host[uuid:%s], need deploy", self.getUuid()));
needDeploy = true;
} else {
Set<String> sanIps = parseSanIps(sanResult.getStdout());
for (String ip : allIps) {
if (!sanIps.contains(ip)) {
Comment on lines +5741 to +5759
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

先规范化 certIpList,否则这里会误判需要重部署。

Line 5743 直接 split(",") 后没有 trim/过滤空值。只要 collectHostIps(...) 的返回里带空格、尾逗号,或者是空串,后面的 SAN 对比就可能把 NEED_DEPLOY_TLS_CERT 错误置为 true

🛠️ 建议修改
-                            List<String> allIps = new ArrayList<>(Arrays.asList(certIpList.split(",")));
+                            List<String> allIps = Arrays.stream(certIpList.split(","))
+                                    .map(String::trim)
+                                    .filter(StringUtils::isNotBlank)
+                                    .collect(Collectors.toList());
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java` around lines 5741 -
5759, The certIpList returned by KVMHostUtils.collectHostIps is used raw
(split(",")) which can produce empty or whitespace entries and cause false
positives in the SAN comparison; normalize and filter it before use: split
certIpList, trim each entry, remove empty strings, rebuild certIpList (or a
canonicalList) and set data.put("TLS_DETECTED_IPS", canonicalList) and use that
canonicalList to populate allIps; keep using parseSanIps(...) and the existing
needDeploy logic but compare against the cleaned allIps to avoid unnecessary
redeploys.

logger.info(String.format("TLS cert SAN missing IP %s on host[uuid:%s], need deploy", ip, self.getUuid()));
needDeploy = true;
break;
}
}
}
}

if (needDeploy) {
data.put("NEED_DEPLOY_TLS_CERT", true);
if (needDeploy) {
data.put("NEED_DEPLOY_TLS_CERT", true);
}
} catch (Exception e) {
logger.warn(String.format(
"TLS cert detection failed on host[uuid:%s], continue connect flow: %s",
self.getUuid(), e.getMessage()), e);
}

trigger.next();
Expand Down Expand Up @@ -5910,11 +5917,19 @@ public void run(final FlowTrigger trigger, Map data) {
self.getUuid(), managementIp, detectedIps);
deployArguments.setTlsCertIps(tlsCertIps);

// Force ansible deploy when TLS cert needs update (detected by check-tls-certs flow)
// ZSTAC-84446: force ansible re-run only when policy allows;
// see KVMHostUtils#shouldForceTlsRedeploy.
Boolean needDeployTlsCert = (Boolean) data.get("NEED_DEPLOY_TLS_CERT");
if (Boolean.TRUE.equals(needDeployTlsCert)) {
boolean allowRestart = rcf.getResourceConfigValue(
KVMGlobalConfig.RECONNECT_HOST_RESTART_LIBVIRTD_SERVICE,
self.getUuid(), Boolean.class);
if (KVMHostUtils.shouldForceTlsRedeploy(
Boolean.TRUE.equals(needDeployTlsCert), allowRestart, info.isNewAdded())) {
runner.setForceRun(true);
deployArguments.setRestartLibvirtd("true");
} else if (Boolean.TRUE.equals(needDeployTlsCert)) {
logger.info(String.format("TLS cert needs deploy on host[uuid:%s], skip " +
"force-run to keep kvmagent PID stable", self.getUuid()));
}

if (deployArguments.isForceRun()) {
Expand Down
15 changes: 15 additions & 0 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHostUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,21 @@ public static String collectHostIps(String hostUuid, String managementIp,
return collectHostIps(newSsh(managementIp, username, password, sshPort), hostUuid, managementIp);
}

/**
* ZSTAC-84446: force ansible re-run + libvirtd restart only when operator
* opted in (RECONNECT_HOST_RESTART_LIBVIRTD_SERVICE) or it's a fresh add
* (full-deploy will start libvirtd anyway). Skipping on plain reconnect
* keeps kvmagent PID stable.
*/
public static boolean shouldForceTlsRedeploy(boolean needDeployTlsCert,
boolean allowRestartLibvirtd,
boolean isNewAdded) {
if (!needDeployTlsCert) {
return false;
}
return allowRestartLibvirtd || isNewAdded;
}

private static SshShell newSsh(String host, String user, String pwd, int port) {
SshShell s = new SshShell();
s.setHostname(host);
Expand Down
29 changes: 29 additions & 0 deletions test/src/test/java/org/zstack/test/kvm/KVMHostUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,33 @@ public void zstac84446_deployUnionContainsAllDetected() {
}
Assert.assertTrue("deploy must include extra", deployIps.contains("10.0.0.7"));
}

// ----- shouldForceTlsRedeploy coverage (ZSTAC-84446) -----

/** needDeployTlsCert=false short-circuits regardless of other flags. */
@Test
public void shouldForceTlsRedeploy_noNeedNeverForces() {
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(false, true, true));
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(false, true, false));
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(false, false, true));
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(false, false, false));
}

/** Operator opted in: force-run allowed on reconnect. */
@Test
public void shouldForceTlsRedeploy_allowRestartForces() {
Assert.assertTrue(KVMHostUtils.shouldForceTlsRedeploy(true, true, false));
}

/** First-add: full-deploy will start libvirtd, so force-run is safe. */
@Test
public void shouldForceTlsRedeploy_newAddedForces() {
Assert.assertTrue(KVMHostUtils.shouldForceTlsRedeploy(true, false, true));
}

/** Reconnect + toggle off: must NOT force-run (would change kvmagent PID). */
@Test
public void shouldForceTlsRedeploy_reconnectWithoutRestartSkips() {
Assert.assertFalse(KVMHostUtils.shouldForceTlsRedeploy(true, false, false));
}
}