Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
import java.io.InputStreamReader;
import java.lang.management.ManagementFactory;
import java.nio.file.FileStore;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
Expand All @@ -60,7 +62,8 @@

static final String SYSTEM = "system";
private final com.sun.management.OperatingSystemMXBean osMxBean;
private Set<FileStore> fileStores = new HashSet<>();
private volatile Set<FileStore> fileStores = new HashSet<>();

Check warning on line 65 in iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/system/SystemMetrics.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use a thread-safe type; adding "volatile" is not enough to make this field thread-safe.

See more on https://sonarcloud.io/project/issues?id=apache_iotdb&issues=AZ66_oUvA5hZo2luh_d6&open=AZ66_oUvA5hZo2luh_d6&pullRequest=17931
private volatile List<String> diskDirs = Collections.emptyList();

Check warning on line 66 in iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/system/SystemMetrics.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use a thread-safe type; adding "volatile" is not enough to make this field thread-safe.

See more on https://sonarcloud.io/project/issues?id=apache_iotdb&issues=AZ66_oUvA5hZo2luh_d7&open=AZ66_oUvA5hZo2luh_d7&pullRequest=17931
private static final String FAILED_TO_STATISTIC = "Failed to statistic the size of {}, because";

public SystemMetrics() {
Expand All @@ -72,6 +75,7 @@
.getMetricConfig()
.getMetricLevel()
.equals(MetricLevel.OFF)) {
this.diskDirs = new ArrayList<>(diskDirs);
this.fileStores = getFileStores(diskDirs);
}
}
Expand Down Expand Up @@ -334,39 +338,62 @@
}

public long getSystemDiskTotalSpace() {
long sysTotalSpace = 0L;
for (FileStore fileStore : fileStores) {
try {
sysTotalSpace += fileStore.getTotalSpace();
} catch (IOException e) {
logger.error(FAILED_TO_STATISTIC, fileStore, e);
}
}
return sysTotalSpace;
return collectDiskSpace(FileStore::getTotalSpace);
}

public long getSystemDiskFreeSpace() {
long sysFreeSpace = 0L;
for (FileStore fileStore : fileStores) {
try {
sysFreeSpace += fileStore.getUnallocatedSpace();
} catch (IOException e) {
logger.error(FAILED_TO_STATISTIC, fileStore, e);
}
}
return sysFreeSpace;
return collectDiskSpace(FileStore::getUnallocatedSpace);
}

public long getSystemDiskAvailableSpace() {
long sysAvailableSpace = 0L;
for (FileStore fileStore : fileStores) {
try {
sysAvailableSpace += fileStore.getUsableSpace();
} catch (IOException e) {
logger.error(FAILED_TO_STATISTIC, fileStore, e);
return collectDiskSpace(FileStore::getUsableSpace);
}

/**
* Sum up a disk-space metric across all cached {@link FileStore}s.
*
* <p>A cached {@code FileStore} pins the exact path it was resolved from. That path can be
* removed while IoTDB is running (for example an empty data region directory deleted during
* region migration), after which every space query against the stale {@code FileStore} throws
* {@link java.nio.file.NoSuchFileException}. When that happens we re-resolve the {@code
* FileStore}s once: {@link org.apache.iotdb.metrics.utils.FileStoreUtils#getFileStore} walks up
* to an existing ancestor directory on the same device, so the metric recovers on the next
* sampling instead of flooding the log with errors on every heartbeat.
*/
private long collectDiskSpace(DiskSpaceReader reader) {
boolean refreshed = false;
while (true) {
Set<FileStore> currentFileStores = fileStores;
long space = 0L;
boolean stale = false;
for (FileStore fileStore : currentFileStores) {
try {
space += reader.read(fileStore);
} catch (IOException e) {
stale = true;
if (refreshed) {

Check warning on line 374 in iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/system/SystemMetrics.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Change this condition so that it does not always evaluate to "false"

See more on https://sonarcloud.io/project/issues?id=apache_iotdb&issues=AZ66_oUuA5hZo2luh_d4&open=AZ66_oUuA5hZo2luh_d4&pullRequest=17931
// Still failing after re-resolving: log once at warn level (instead of error on every
// sampling) and skip this file store to keep the metric best-effort.
logger.warn(FAILED_TO_STATISTIC, fileStore, e);
}
}
}
if (!stale || refreshed) {

Check warning on line 381 in iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/system/SystemMetrics.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this expression which always evaluates to "false"

See more on https://sonarcloud.io/project/issues?id=apache_iotdb&issues=AZ66_oUvA5hZo2luh_d5&open=AZ66_oUvA5hZo2luh_d5&pullRequest=17931
return space;
}
refreshFileStores();
refreshed = true;
}
return sysAvailableSpace;
}

/** Re-resolve the cached {@link FileStore}s from the configured disk dirs. */

Check warning on line 389 in iotdb-core/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/system/SystemMetrics.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Single-line Javadoc comment should be multi-line.

See more on https://sonarcloud.io/project/issues?id=apache_iotdb&issues=AZ66_oUvA5hZo2luh_d8&open=AZ66_oUvA5hZo2luh_d8&pullRequest=17931
private synchronized void refreshFileStores() {
this.fileStores = getFileStores(diskDirs);
}

@FunctionalInterface
private interface DiskSpaceReader {
long read(FileStore fileStore) throws IOException;
}

public static SystemMetrics getInstance() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iotdb.metrics.metricsets.system;

import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.File;
import java.util.Collections;

import static org.junit.Assert.assertTrue;

public class SystemMetricsTest {

private File tempDir;

@Before
public void setUp() {
tempDir = new File("target", "system-metrics-test-" + System.nanoTime());
File dataDir = new File(tempDir, "data");
assertTrue(dataDir.mkdirs());
}

@After
public void tearDown() {
if (tempDir != null) {
// Best-effort cleanup; the data subdir may already be gone after the test.
new File(tempDir, "data").delete();
tempDir.delete();
}
}

/**
* Regression test for the case where the directory backing a cached {@link
* java.nio.file.FileStore} is removed while IoTDB is running (e.g. an empty data region directory
* deleted during region migration). The disk-space metrics must recover by re-resolving the file
* stores instead of throwing / permanently returning the stale value.
*/
@Test
public void testDiskSpaceRecoversWhenBackingDirIsRemoved() {
SystemMetrics systemMetrics = new SystemMetrics();
File dataDir = new File(tempDir, "data");
systemMetrics.setDiskDirs(Collections.singletonList(dataDir.getAbsolutePath()));

// Sanity check: the file store resolves to a real device with a positive size.
assertTrue(systemMetrics.getSystemDiskTotalSpace() > 0L);
assertTrue(systemMetrics.getSystemDiskFreeSpace() > 0L);
assertTrue(systemMetrics.getSystemDiskAvailableSpace() > 0L);

// The directory the FileStore was pinned to is removed; its parent still lives on the same
// device. The metrics should re-resolve the file store and keep reporting a positive size
// rather than flooding the log with NoSuchFileException.
assertTrue(dataDir.delete());

assertTrue(systemMetrics.getSystemDiskTotalSpace() > 0L);
assertTrue(systemMetrics.getSystemDiskFreeSpace() > 0L);
assertTrue(systemMetrics.getSystemDiskAvailableSpace() > 0L);
}
}
Loading