From 2e35f418aea339fca90f3de9b0ef84785e4e08f5 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Thu, 16 Oct 2025 13:26:30 +0200 Subject: [PATCH 1/8] Add maintenance as avail state for maintenance reservations --- reframe/core/schedulers/slurm.py | 39 ++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index fc7ff8543c..e28a257c41 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -147,6 +147,7 @@ def __init__(self): self._sched_access_in_submit = self.get_option( 'sched_access_in_submit' ) + self.addl_avail_states = set() def make_job(self, *args, **kwargs): return _SlurmJob(*args, **kwargs) @@ -323,7 +324,7 @@ def allnodes(self): 'could not retrieve node information') from e node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions) + return _create_nodes(node_descriptions, self.addl_avail_states) def _get_default_partition(self): completed = _run_strict('scontrol -a show -o partitions') @@ -436,15 +437,23 @@ def _get_reservation_nodes(self, reservation): raise JobSchedulerError("could not extract the node names for " "reservation '%s'" % reservation) + flags_match = re.search(r'Flags=(\S+)', completed.stdout) + if flags_match: + if 'MAINT' in flags_match[1].split(','): + self.addl_avail_states.add('MAINTENANCE') + # else: + # raise JobSchedulerError(f"could not extract the reservation " + # f"flags for reservation '{reservation}'") + completed = _run_strict('scontrol -a show -o %s' % reservation_nodes) node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions) + return _create_nodes(node_descriptions, self.addl_avail_states) def _get_nodes_by_name(self, nodespec): completed = osext.run_command('scontrol -a show -o node %s' % nodespec) node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions) + return _create_nodes(node_descriptions, self.addl_avail_states) def _update_completion_time(self, job, timestamps): if job._completion_time is not None: @@ -691,11 +700,11 @@ def poll(self, *jobs): self._cancel_if_pending_too_long(job) -def _create_nodes(descriptions): +def _create_nodes(descriptions, addl_avail_states=None): nodes = set() for descr in descriptions: with suppress(JobSchedulerError): - nodes.add(_SlurmNode(descr)) + nodes.add(_SlurmNode(descr, addl_avail_states=addl_avail_states)) return nodes @@ -703,7 +712,7 @@ def _create_nodes(descriptions): class _SlurmNode(sched.Node): '''Class representing a Slurm node.''' - def __init__(self, node_descr): + def __init__(self, node_descr, addl_avail_states=None): self._name = self._extract_attribute('NodeName', node_descr) if not self._name: raise JobSchedulerError( @@ -718,6 +727,15 @@ def __init__(self, node_descr): 'State', node_descr, sep='+') or set() self._descr = node_descr + self.addl_avail_states = addl_avail_states or set() + self.available_states = { + 'ALLOCATED', + 'COMPLETING', + 'IDLE', + 'PLANNED', + 'RESERVED' + } | self.addl_avail_states + def __eq__(self, other): if not isinstance(other, type(self)): return NotImplemented @@ -735,14 +753,7 @@ def in_statex(self, state): return self._states == set(state.upper().split('+')) def is_avail(self): - available_states = { - 'ALLOCATED', - 'COMPLETING', - 'IDLE', - 'PLANNED', - 'RESERVED' - } - return self._states <= available_states + return self._states <= self.available_states def is_down(self): return not self.is_avail() From c7e38c4309ff91f3a707fffe292494ddc676208d Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Mon, 24 Nov 2025 06:17:53 +0100 Subject: [PATCH 2/8] Add log --- reframe/core/schedulers/slurm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index e28a257c41..a369314010 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -441,9 +441,9 @@ def _get_reservation_nodes(self, reservation): if flags_match: if 'MAINT' in flags_match[1].split(','): self.addl_avail_states.add('MAINTENANCE') - # else: - # raise JobSchedulerError(f"could not extract the reservation " - # f"flags for reservation '{reservation}'") + else: + self.log(f"could not extract the reservation flags for " + f"reservation '{reservation}'") completed = _run_strict('scontrol -a show -o %s' % reservation_nodes) node_descriptions = completed.stdout.splitlines() From 24aa49a995348247e41c466a1c60df68b837456f Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Mon, 24 Nov 2025 09:19:05 +0100 Subject: [PATCH 3/8] Move is_avail and is_down functions in scheduler --- reframe/core/schedulers/__init__.py | 37 +++++++++--------------- reframe/core/schedulers/slurm.py | 45 ++++++++++++++--------------- reframe/frontend/testgenerators.py | 6 +++- unittests/test_schedulers.py | 11 ++++--- 4 files changed, 47 insertions(+), 52 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index 5892700785..cfe635c48e 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -153,7 +153,7 @@ def log(self, message, level=DEBUG2): getlogger().log(level, f'[S] {self.registered_name}: {message}') -def filter_nodes_by_state(nodelist, state): +def filter_nodes_by_state(nodelist, state, scheduler): '''Filter nodes by their state :arg nodelist: List of :class:`Node` instances to filter. @@ -178,11 +178,13 @@ def filter_nodes_by_state(nodelist, state): allowed_states = state.split('|') final_nodelist = set() for s in allowed_states: - final_nodelist.update(filter_nodes_by_state(nodelist, s)) + final_nodelist.update( + filter_nodes_by_state(nodelist, s, scheduler) + ) nodelist = final_nodelist elif state == 'avail': - nodelist = {n for n in nodelist if n.is_avail()} + nodelist = {n for n in nodelist if scheduler.is_node_avail(n)} elif state != 'all': if state.endswith('*'): # non-exclusive state match @@ -618,19 +620,22 @@ def guess_num_tasks(self): f'[F] Total available nodes: {len(available_nodes)}' ) + available_nodes = self.scheduler.filternodes(self, available_nodes) + getlogger().debug( + f'[F] Total available after scheduler filter: ' + f'{len(available_nodes)}' + ) + # Try to guess the number of tasks now available_nodes = filter_nodes_by_state( - available_nodes, self.sched_flex_alloc_nodes.lower() + available_nodes, + self.sched_flex_alloc_nodes.lower(), + self.scheduler ) getlogger().debug( f'[F] Total available in state=' f'{self.sched_flex_alloc_nodes.lower()}: {len(available_nodes)}' ) - available_nodes = self.scheduler.filternodes(self, available_nodes) - getlogger().debug( - f'[F] Total available after scheduler filter: ' - f'{len(available_nodes)}' - ) return len(available_nodes) * num_tasks_per_node def submit(self): @@ -694,17 +699,6 @@ def in_state(self, state): :class:`False` otherwise. ''' - @abc.abstractmethod - def is_avail(self): - '''Check whether the node is available for scheduling jobs.''' - - def is_down(self): - '''Check whether node is down. - - This is the inverse of :func:`is_avail`. - ''' - return not self.is_avail() - class AlwaysIdleNode(Node): def __init__(self, name): @@ -715,9 +709,6 @@ def __init__(self, name): def name(self): return self._name - def is_avail(self): - return True - def in_statex(self, state): return state.lower() == self._state diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index a369314010..bd8514dedd 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -147,7 +147,13 @@ def __init__(self): self._sched_access_in_submit = self.get_option( 'sched_access_in_submit' ) - self.addl_avail_states = set() + self.node_available_states = { + 'ALLOCATED', + 'COMPLETING', + 'IDLE', + 'PLANNED', + 'RESERVED' + } def make_job(self, *args, **kwargs): return _SlurmJob(*args, **kwargs) @@ -324,7 +330,7 @@ def allnodes(self): 'could not retrieve node information') from e node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions, self.addl_avail_states) + return _create_nodes(node_descriptions) def _get_default_partition(self): completed = _run_strict('scontrol -a show -o partitions') @@ -440,20 +446,20 @@ def _get_reservation_nodes(self, reservation): flags_match = re.search(r'Flags=(\S+)', completed.stdout) if flags_match: if 'MAINT' in flags_match[1].split(','): - self.addl_avail_states.add('MAINTENANCE') + self.node_available_states.add('MAINTENANCE') else: self.log(f"could not extract the reservation flags for " f"reservation '{reservation}'") completed = _run_strict('scontrol -a show -o %s' % reservation_nodes) node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions, self.addl_avail_states) + return _create_nodes(node_descriptions) def _get_nodes_by_name(self, nodespec): completed = osext.run_command('scontrol -a show -o node %s' % nodespec) node_descriptions = completed.stdout.splitlines() - return _create_nodes(node_descriptions, self.addl_avail_states) + return _create_nodes(node_descriptions) def _update_completion_time(self, job, timestamps): if job._completion_time is not None: @@ -603,7 +609,7 @@ def _do_cancel_if_blocked(self, job, reason_descr): self.log(f'Checking if nodes {node_names!r} ' f'are indeed unavailable') nodes = self._get_nodes_by_name(node_names) - if not any(n.is_down() for n in nodes): + if not any(self.is_node_down(n) for n in nodes): return self.cancel(job) @@ -639,6 +645,12 @@ def cancel(self, job): def finished(self, job): return slurm_state_completed(job.state) + def is_node_avail(self, node): + return node.states <= self.node_available_states + + def is_node_down(self, node): + return not self.is_node_avail(node) + @register_scheduler('squeue') class SqueueJobScheduler(SlurmJobScheduler): @@ -700,11 +712,11 @@ def poll(self, *jobs): self._cancel_if_pending_too_long(job) -def _create_nodes(descriptions, addl_avail_states=None): +def _create_nodes(descriptions): nodes = set() for descr in descriptions: with suppress(JobSchedulerError): - nodes.add(_SlurmNode(descr, addl_avail_states=addl_avail_states)) + nodes.add(_SlurmNode(descr)) return nodes @@ -712,7 +724,7 @@ def _create_nodes(descriptions, addl_avail_states=None): class _SlurmNode(sched.Node): '''Class representing a Slurm node.''' - def __init__(self, node_descr, addl_avail_states=None): + def __init__(self, node_descr): self._name = self._extract_attribute('NodeName', node_descr) if not self._name: raise JobSchedulerError( @@ -727,15 +739,6 @@ def __init__(self, node_descr, addl_avail_states=None): 'State', node_descr, sep='+') or set() self._descr = node_descr - self.addl_avail_states = addl_avail_states or set() - self.available_states = { - 'ALLOCATED', - 'COMPLETING', - 'IDLE', - 'PLANNED', - 'RESERVED' - } | self.addl_avail_states - def __eq__(self, other): if not isinstance(other, type(self)): return NotImplemented @@ -752,12 +755,6 @@ def in_state(self, state): def in_statex(self, state): return self._states == set(state.upper().split('+')) - def is_avail(self): - return self._states <= self.available_states - - def is_down(self): - return not self.is_avail() - def satisfies(self, slurm_constraint): # Convert the Slurm constraint to a Python expression and evaluate it, # but restrict our syntax to accept only AND or OR constraints and diff --git a/reframe/frontend/testgenerators.py b/reframe/frontend/testgenerators.py index f7da28aaee..27f36f86c9 100644 --- a/reframe/frontend/testgenerators.py +++ b/reframe/frontend/testgenerators.py @@ -38,7 +38,11 @@ def getallnodes(state, jobs_cli_options=None): f'Total available nodes for {part.name}: {len(available_nodes)}' ) - available_nodes = filter_nodes_by_state(available_nodes, state) + available_nodes = filter_nodes_by_state( + available_nodes, + state, + part.scheduler + ) nodes[part.fullname] = [n.name for n in available_nodes] return nodes diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 9a14da9d6c..1ebc3bfbc3 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -41,6 +41,8 @@ def slurm_only(scheduler): if scheduler.registered_name not in ('slurm', 'squeue'): pytest.skip('test is relevant only for Slurm backends') + return scheduler + @pytest.fixture def local_only(scheduler): @@ -1455,7 +1457,8 @@ def test_slurm_node_in_state(slurm_node_allocated, def test_slurm_node_is_down(slurm_node_allocated, slurm_node_idle, - slurm_node_nopart): - assert not slurm_node_allocated.is_down() - assert not slurm_node_idle.is_down() - assert slurm_node_nopart.is_down() + slurm_node_nopart, + slurm_only): + assert not slurm_only().is_node_down(slurm_node_allocated) + assert not slurm_only().is_node_down(slurm_node_idle) + assert slurm_only().is_node_down(slurm_node_nopart) \ No newline at end of file From 0ff3fa4112fa4917e12023a91f8a884fac275f61 Mon Sep 17 00:00:00 2001 From: Eirini Koutsaniti Date: Mon, 24 Nov 2025 09:20:34 +0100 Subject: [PATCH 4/8] Small fixes --- reframe/core/schedulers/slurm.py | 2 +- unittests/test_schedulers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index bd8514dedd..fef29202b8 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -445,7 +445,7 @@ def _get_reservation_nodes(self, reservation): flags_match = re.search(r'Flags=(\S+)', completed.stdout) if flags_match: - if 'MAINT' in flags_match[1].split(','): + if 'MAINT' in flags_match.group(1).split(','): self.node_available_states.add('MAINTENANCE') else: self.log(f"could not extract the reservation flags for " diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 1ebc3bfbc3..38de11ecb0 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1461,4 +1461,4 @@ def test_slurm_node_is_down(slurm_node_allocated, slurm_only): assert not slurm_only().is_node_down(slurm_node_allocated) assert not slurm_only().is_node_down(slurm_node_idle) - assert slurm_only().is_node_down(slurm_node_nopart) \ No newline at end of file + assert slurm_only().is_node_down(slurm_node_nopart) From a075bd4394e80c529b831eb9ebebb4cad9d3cdc2 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 27 Nov 2025 18:35:02 +0100 Subject: [PATCH 5/8] Extend and enhance the unit tests for Slurm available nodes Also: - Normalize syntax of `scontrol` commands --- reframe/core/schedulers/slurm.py | 23 +-- unittests/test_schedulers.py | 265 ++++++++++++++++++++++--------- 2 files changed, 200 insertions(+), 88 deletions(-) diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index fef29202b8..06c233eaa5 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -434,29 +434,31 @@ def filternodes(self, job, nodes): return nodes - def _get_reservation_nodes(self, reservation): - completed = _run_strict('scontrol -a show res %s' % reservation) - node_match = re.search(r'(Nodes=\S+)', completed.stdout) + def _get_reservation_nodes(self, resv): + completed = _run_strict(f'scontrol -a show -o reservations {resv}') + node_match = re.search(r'Nodes=(\S+)', completed.stdout) if node_match: reservation_nodes = node_match[1] else: - raise JobSchedulerError("could not extract the node names for " - "reservation '%s'" % reservation) + raise JobSchedulerError('could not extract the node names for ' + f'reservation {resv!r}') flags_match = re.search(r'Flags=(\S+)', completed.stdout) if flags_match: if 'MAINT' in flags_match.group(1).split(','): self.node_available_states.add('MAINTENANCE') else: - self.log(f"could not extract the reservation flags for " - f"reservation '{reservation}'") + self.log('could not extract the reservation flags for ' + f'reservation {resv!r}') - completed = _run_strict('scontrol -a show -o %s' % reservation_nodes) + completed = _run_strict( + f'scontrol -a show -o nodes {reservation_nodes}' + ) node_descriptions = completed.stdout.splitlines() return _create_nodes(node_descriptions) def _get_nodes_by_name(self, nodespec): - completed = osext.run_command('scontrol -a show -o node %s' % + completed = osext.run_command('scontrol -a show -o nodes %s' % nodespec) node_descriptions = completed.stdout.splitlines() return _create_nodes(node_descriptions) @@ -748,6 +750,9 @@ def __eq__(self, other): def __hash__(self): return hash(self.name) + def __repr__(self): + return f'_SlurmNode({self.name!r})' + def in_state(self, state): return all([self._states >= set(state.upper().split('+')), self._partitions, self._active_features, self._states]) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 38de11ecb0..8fe9e8ff8c 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -20,6 +20,7 @@ ) from reframe.core.schedulers import Job from reframe.core.schedulers.slurm import _SlurmNode, _create_nodes +from reframe.utility import nodelist_expand @pytest.fixture @@ -868,15 +869,18 @@ def test_cancel_term_ignore(minimal_job, scheduler, local_only): @pytest.fixture -def slurm_nodes(): +def slurm_nodes(tmp_path): '''Dummy Slurm node descriptions''' - return ['NodeName=nid00001 Arch=x86_64 CoresPerSocket=12 ' + + nodemap = { + 'nid0001': ( + 'NodeName=nid0001 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' - 'Sockets=1 Boards=1 State=MAINT+DRAIN ' + 'Sockets=1 Boards=1 State=MAINTENANCE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' 'MCS_label=N/A Partitions=p1,p2,pdef ' 'BootTime=01 Jan 2018 ' @@ -886,15 +890,16 @@ def slurm_nodes(): 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' 'ExtSensorsTemp=n/s Reason=Foo/ ' - 'failed [reframe_user@01 Jan 2018]', - - 'NodeName=nid00002 Arch=x86_64 CoresPerSocket=12 ' + 'failed [reframe_user@01 Jan 2018]' + ), + 'nid0002': ( + 'NodeName=nid0002 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f2,f3 ActiveFeatures=f2,f3 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00002 ' - 'NodeHostName=nid00002 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0002 ' + 'NodeHostName=nid0002 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' - 'Sockets=1 Boards=1 State=MAINT+DRAIN ' + 'Sockets=1 Boards=1 State=MAINTENANCE+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' 'MCS_label=N/A Partitions=p2,p3,pdef ' 'BootTime=01 Jan 2018 ' @@ -904,15 +909,15 @@ def slurm_nodes(): 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' 'ExtSensorsTemp=n/s Reason=Foo/ ' - 'failed [reframe_user@01 Jan 2018]', - - 'Node invalid_node1 not found', - - 'NodeName=nid00003 Arch=x86_64 CoresPerSocket=12 ' + 'failed [reframe_user@01 Jan 2018]' + ), + 'invalid_node1': 'Node invalid_node1 not found', + 'nid0003': ( + 'NodeName=nid0003 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f3 ActiveFeatures=f1,f3 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00003' - 'NodeHostName=nid00003 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0003' + 'NodeHostName=nid0003 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -924,13 +929,14 @@ def slurm_nodes(): 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' 'ExtSensorsTemp=n/s Reason=Foo/ ' - 'failed [reframe_user@01 Jan 2018]', - - 'NodeName=nid00004 Arch=x86_64 CoresPerSocket=12 ' + 'failed [reframe_user@01 Jan 2018]' + ), + 'nid0004': ( + 'NodeName=nid0004 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f4 ActiveFeatures=f1,f4 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00004' - 'NodeHostName=nid00004 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0004' + 'NodeHostName=nid0004 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -941,13 +947,14 @@ def slurm_nodes(): 'AllocTRES= CapWatts=n/a CurrentWatts=100 ' 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' - 'ExtSensorsTemp=n/s Reason=Foo/ ', - - 'NodeName=nid00005 Arch=x86_64 CoresPerSocket=12 ' + 'ExtSensorsTemp=n/s Reason=Foo/ ' + ), + 'nid0005': ( + 'NodeName=nid0005 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f5 ActiveFeatures=f5 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00005' - 'NodeHostName=nid00005 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0005' + 'NodeHostName=nid0005 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=ALLOCATED ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -959,15 +966,16 @@ def slurm_nodes(): 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' 'ExtSensorsTemp=n/s Reason=Foo/ ' - 'failed [reframe_user@01 Jan 2018]', - - 'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 ' + 'failed [reframe_user@01 Jan 2018]' + ), + 'nid0006': ( + 'NodeName=nid0006 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f6 ActiveFeatures=f6 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006' - 'NodeHostName=nid00006 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0006' + 'NodeHostName=nid0006 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' - 'Sockets=1 Boards=1 State=MAINT ' + 'Sockets=1 Boards=1 State=MAINTENANCE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' 'MCS_label=N/A Partitions=p4 ' 'BootTime=01 Jan 2018 ' @@ -977,30 +985,106 @@ def slurm_nodes(): 'LowestJoules=100000000 ConsumedJoules=0 ' 'ExtSensorsJoules=n/s ExtSensorsWatts=0 ' 'ExtSensorsTemp=n/s Reason=Foo/ ' - 'failed [reframe_user@01 Jan 2018]', + 'failed [reframe_user@01 Jan 2018]' + ), + 'invalid_node2': 'Node invalid_node2 not found' + } + + def _dump_nodes(nodelist): + if nodelist is None: + nodelist = list(nodemap.keys()) + else: + nodelist = nodelist_expand(nodelist) - 'Node invalid_node2 not found'] + nodes_file = tmp_path / 'nodes.txt' + with open(nodes_file, 'w') as fp: + for node in nodelist: + fp.write(f'{nodemap[node]}\n') + + return nodes_file + + return _dump_nodes @pytest.fixture -def slurm_scheduler_patched(slurm_nodes): - ret = getscheduler('slurm')() - ret.allnodes = lambda: _create_nodes(slurm_nodes) - ret._get_default_partition = lambda: 'pdef' - ret._get_reservation_nodes = lambda res: { - n for n in ret.allnodes() if n.name != 'nid00001' - } - ret._get_nodes_by_name = lambda name: { - n for n in ret.allnodes() if n.name == name - } - return ret +def slurm_reservation(tmp_path): + resv_file = tmp_path / 'resv.txt' + with open(resv_file, 'w') as fp: + fp.write('ReservationName=dummy StartTime=2018-01-01T00:00:00 ' + 'EndTime=2019-01-01T00:00:00 Duration=365-00:00:00 ' + 'Nodes=nid[0001-0002,0006] NodeCnt=3 CoreCnt=72 ' + 'Features=(null) PartitionName=(null) ' + 'Flags=MAINT,IGNORE_JOBS,SPEC_NODES TRES=cpu=72 ' + 'Users=(null) Groups=(null) Accounts=admin Licenses=(null) ' + 'State=ACTIVE BurstBuffer=(null) MaxStartDelay=(null)\n') + + return resv_file + + +@pytest.fixture +def slurm_partitions(tmp_path): + def _gen_partition(name, nodelist, default=False): + nodes = nodelist_expand(nodelist) + ans = 'YES' if default else 'NO' + return (f'PartitionName={name} AllowGroups=ALL AllowAccounts=ALL ' + f'AllowQos=ALL AllocNodes=ALL Default={ans} QoS=partition ' + 'DefaultTime=02:00:00 DisableRootJobs=NO ExclusiveUser=NO ' + 'ExclusiveTopo=NO GraceTime=0 Hidden=NO MaxNodes=8 ' + 'MaxTime=1-00:00:00 MinNodes=0 LLN=NO ' + 'MaxCPUsPerNode=UNLIMITED MaxCPUsPerSocket=UNLIMITED ' + f'NodeSets=nid-nodeset Nodes={nodelist} PriorityJobFactor=1 ' + 'PriorityTier=1 RootOnly=NO ReqResv=NO ' + 'OverSubscribe=EXCLUSIVE OverTimeLimit=NONE PreemptMode=OFF ' + f'State=UP TotalCPUs={24*len(nodes)} TotalNodes={len(nodes)} ' + 'SelectTypeParameters=NONE JobDefaults=(null) ' + 'DefMemPerNode=UNLIMITED MaxMemPerNode=UNLIMITED ' + f'TRES=cpu={24*len(nodes)},mem={32220*len(nodes)}M,' + f'node={len(nodes)},billing={len(nodes)} ' + 'TRESBillingWeights=Node=1\n') + + part_file = tmp_path / 'partitions.txt' + with open(part_file, 'w') as fp: + fp.write(_gen_partition('p1', 'nid[0001,0003-0004,0005]')) + fp.write(_gen_partition('p2', 'nid[0001-0002]')) + fp.write(_gen_partition('p3', 'nid[0002-0005]')) + fp.write(_gen_partition('p4', 'nid0006')) + fp.write(_gen_partition('pdef', 'nid[0001-0004]', default=True)) + + return part_file @pytest.fixture -def make_flexible_job(slurm_scheduler_patched, tmp_path): +def slurm_commands(monkeypatch, slurm_nodes, + slurm_reservation, slurm_partitions): + run_command = osext.run_command + + def _run_command_patched(cmd, *args, **kwargs): + print(f'{cmd}') + node_match = re.match(r'scontrol -a show -o nodes (\S+)', cmd) + if cmd == 'scontrol -a show -o reservations dummy': + cmd = f'cat {slurm_reservation}' + elif cmd == 'scontrol -a show -o nodes': + cmd = f'cat {slurm_nodes(None)}' + elif node_match: + cmd = f'cat {slurm_nodes(node_match.group(1))}' + elif cmd == 'scontrol -a show -o partitions': + cmd = f'cat {slurm_partitions}' + + print(f'-> {cmd}') + return run_command(cmd, *args, **kwargs) + + import functools + import reframe.core.schedulers.slurm as slurm + monkeypatch.setattr(osext, 'run_command', _run_command_patched) + monkeypatch.setattr(slurm, '_run_strict', + functools.partial(_run_command_patched, check=True)) + + +@pytest.fixture +def make_flexible_job(tmp_path, slurm_commands): def _make_flexible_job(flex_type, **jobargs): ret = Job.create( - slurm_scheduler_patched, getlauncher('local')(), + getscheduler('slurm')(), getlauncher('local')(), name='testjob', workdir=tmp_path, script_filename=str(tmp_path / 'job.sh'), @@ -1153,27 +1237,27 @@ def test_flex_alloc_valid_reservation_cmd(make_flexible_job): sched_options=['--reservation=dummy']) prepare_job(job) - assert job.num_tasks == 4 + assert job.num_tasks == 8 def test_flex_alloc_valid_reservation_option(make_flexible_job): job = make_flexible_job('all', sched_access=['--constraint=f2']) job.options = ['--reservation=dummy'] prepare_job(job) - assert job.num_tasks == 4 + assert job.num_tasks == 8 def test_flex_alloc_exclude_nodes_cmd(make_flexible_job): job = make_flexible_job('all', sched_access=['--constraint=f1'], - sched_options=['--exclude=nid00001']) + sched_options=['--exclude=nid0001']) prepare_job(job) assert job.num_tasks == 8 def test_flex_alloc_exclude_nodes_opt(make_flexible_job): job = make_flexible_job('all', sched_access=['--constraint=f1']) - job.options = ['-x nid00001'] + job.options = ['-x nid0001'] prepare_job(job) assert job.num_tasks == 8 @@ -1199,7 +1283,7 @@ def test_flex_alloc_not_enough_idle_nodes(make_flexible_job, strict_flex): def test_flex_alloc_maintenance_nodes(make_flexible_job): - job = make_flexible_job('maint') + job = make_flexible_job('maintenance') job.options = ['--partition=p4'] prepare_job(job) assert job.num_tasks == 4 @@ -1255,33 +1339,56 @@ def test_flex_alloc_alloc_state_OR(make_flexible_job): prepare_job(job) assert job.num_tasks == 12 - job = make_flexible_job('maint*|idle') + job = make_flexible_job('maintenance*|idle') prepare_job(job) assert job.num_tasks == 16 - job = make_flexible_job('maint|avail') + job = make_flexible_job('maintenance|avail') job.options = ['--partition=p1'] prepare_job(job) - assert job.num_tasks == 12 + assert job.num_tasks == 16 job = make_flexible_job('all|idle') prepare_job(job) assert job.num_tasks == 16 - job = make_flexible_job('allocated|idle|maint') + job = make_flexible_job('allocated|idle|maintenance') job.options = ['--partition=p1'] prepare_job(job) + assert job.num_tasks == 16 + + +def test_flex_alloc_avail(make_flexible_job): + job = make_flexible_job('avail') + prepare_job(job) + assert job.num_tasks == 8 + + job = make_flexible_job('avail') + job.options = ['--partition=p3'] + prepare_job(job) assert job.num_tasks == 12 + # `MAINTENANCE` state is treated as available for reservations with + # Flags=MAINT + job = make_flexible_job('avail') + job.options = ['--reservation=dummy'] + prepare_job(job) + assert job.num_tasks == 4 + + job = make_flexible_job('avail') + job.options = ['--reservation=dummy', '--partition=p4'] + prepare_job(job) + assert job.num_tasks == 4 + @pytest.fixture def slurm_node_allocated(): return _SlurmNode( - 'NodeName=nid00001 Arch=x86_64 CoresPerSocket=12 ' + 'NodeName=nid0001 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=ALLOCATED ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -1300,11 +1407,11 @@ def slurm_node_allocated(): @pytest.fixture def slurm_node_idle(): return _SlurmNode( - 'NodeName=nid00002 Arch=x86_64 CoresPerSocket=12 ' + 'NodeName=nid0002 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -1323,11 +1430,11 @@ def slurm_node_idle(): @pytest.fixture def slurm_node_drained(): return _SlurmNode( - 'NodeName=nid00003 Arch=x86_64 CoresPerSocket=12 ' + 'NodeName=nid0003 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -1346,11 +1453,11 @@ def slurm_node_drained(): @pytest.fixture def slurm_node_nopart(): return _SlurmNode( - 'NodeName=nid00004 Arch=x86_64 CoresPerSocket=12 ' + 'NodeName=nid0004 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -1368,13 +1475,13 @@ def slurm_node_nopart(): @pytest.fixture def slurm_node_maintenance(): return _SlurmNode( - 'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 ' + 'NodeName=nid0006 Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f6 ActiveFeatures=f6 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006' - 'NodeHostName=nid00006 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0006' + 'NodeHostName=nid0006 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' - 'Sockets=1 Boards=1 State=MAINT ' + 'Sockets=1 Boards=1 State=MAINTENANCE ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' 'MCS_label=N/A Partitions=p4 ' 'BootTime=01 Jan 2018 ' @@ -1394,8 +1501,8 @@ def test_slurm_node_noname(): 'Arch=x86_64 CoresPerSocket=12 ' 'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 ' 'AvailableFeatures=f1,f2 ActiveFeatures=f1,f2 ' - 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00001 ' - 'NodeHostName=nid00001 Version=10.00 OS=Linux ' + 'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid0001 ' + 'NodeHostName=nid0001 Version=10.00 OS=Linux ' 'RealMemory=32220 AllocMem=0 FreeMem=10000 ' 'Sockets=1 Boards=1 State=IDLE+DRAIN ' 'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A ' @@ -1425,10 +1532,10 @@ def test_slurm_node_equals(slurm_node_allocated, slurm_node_idle): def test_slurm_node_attributes(slurm_node_allocated, slurm_node_nopart): - assert slurm_node_allocated.name == 'nid00001' + assert slurm_node_allocated.name == 'nid0001' assert slurm_node_allocated.partitions == {'p1', 'p2'} assert slurm_node_allocated.active_features == {'f1', 'f2'} - assert slurm_node_nopart.name == 'nid00004' + assert slurm_node_nopart.name == 'nid0004' assert slurm_node_nopart.partitions == set() assert slurm_node_nopart.active_features == {'f1', 'f2'} @@ -1439,7 +1546,7 @@ def test_hash(slurm_node_allocated): def test_str(slurm_node_allocated): - assert 'nid00001' == str(slurm_node_allocated) + assert 'nid0001' == str(slurm_node_allocated) def test_slurm_node_in_state(slurm_node_allocated, From e57468345b4c488e88d2959d3a7f6062d25fa51f Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 28 Nov 2025 13:03:55 +0100 Subject: [PATCH 6/8] Remove unused imports --- unittests/test_schedulers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 8fe9e8ff8c..c62e121bd9 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -19,7 +19,7 @@ ConfigError, JobError, JobNotStartedError, JobSchedulerError, SkipTestError ) from reframe.core.schedulers import Job -from reframe.core.schedulers.slurm import _SlurmNode, _create_nodes +from reframe.core.schedulers.slurm import _SlurmNode from reframe.utility import nodelist_expand From fdcd0bd4fc052071a781ba51dbe4e532ddf0fac8 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 28 Nov 2025 13:35:36 +0100 Subject: [PATCH 7/8] Address PR comments --- reframe/core/schedulers/__init__.py | 103 ++++++++++++++-------------- reframe/core/schedulers/slurm.py | 11 ++- reframe/frontend/testgenerators.py | 8 +-- 3 files changed, 61 insertions(+), 61 deletions(-) diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py index cfe635c48e..e66224cfe6 100644 --- a/reframe/core/schedulers/__init__.py +++ b/reframe/core/schedulers/__init__.py @@ -100,6 +100,57 @@ def filternodes(self, job, nodes): :meta private: ''' + def filternodes_by_state(self, nodelist, state): + '''Filter nodes by their state + + :arg nodelist: List of :class:`Node` instances to filter. + :arg state: The state of the nodes. + If ``all``, the initial list is returned untouched. + If ``avail``, only the available nodes will be returned. + All other values are interpreted as a state string. + The pipe character ``|`` can be used as to specify multiple + alternative node states. + State match is exclusive unless the ``*`` is added at the end of the + state string. + When defining multiple states using ``|``, ``*`` has to be added at + the end of each alternative state for which a non-exclusive match is + required. + + :returns: the filtered node list + + .. versionchanged:: 4.9 + + Support the ``|`` character to filter according to alternative states. + + .. versionchanged:: 4.10 + + Moved inside the :class:`JobShceduler` class. + ''' + if '|' in state: + allowed_states = state.split('|') + final_nodelist = set() + for s in allowed_states: + final_nodelist.update( + self.filternodes_by_state(nodelist, s) + ) + + nodelist = final_nodelist + elif state == 'avail': + nodelist = {n for n in nodelist if self.is_node_avail(n)} + elif state != 'all': + if state.endswith('*'): + # non-exclusive state match + state = state[:-1] + nodelist = { + n for n in nodelist if n.in_state(state) + } + else: + nodelist = { + n for n in nodelist if n.in_statex(state) + } + + return nodelist + @abc.abstractmethod def submit(self, job): '''Submit a job. @@ -153,53 +204,6 @@ def log(self, message, level=DEBUG2): getlogger().log(level, f'[S] {self.registered_name}: {message}') -def filter_nodes_by_state(nodelist, state, scheduler): - '''Filter nodes by their state - - :arg nodelist: List of :class:`Node` instances to filter. - :arg state: The state of the nodes. - If ``all``, the initial list is returned untouched. - If ``avail``, only the available nodes will be returned. - All other values are interpreted as a state string. - The pipe character ``|`` can be used as to specify multiple - alternative node states. - State match is exclusive unless the ``*`` is added at the end of the - state string. - When defining multiple states using ``|``, ``*`` has to be added at - the end of each alternative state for which a non-exclusive match is - required. - - :returns: the filtered node list - - .. versionchanged:: 4.9 - Support the ``|`` character to filter according to alternative states. - ''' - if '|' in state: - allowed_states = state.split('|') - final_nodelist = set() - for s in allowed_states: - final_nodelist.update( - filter_nodes_by_state(nodelist, s, scheduler) - ) - - nodelist = final_nodelist - elif state == 'avail': - nodelist = {n for n in nodelist if scheduler.is_node_avail(n)} - elif state != 'all': - if state.endswith('*'): - # non-exclusive state match - state = state[:-1] - nodelist = { - n for n in nodelist if n.in_state(state) - } - else: - nodelist = { - n for n in nodelist if n.in_statex(state) - } - - return nodelist - - class Job(jsonext.JSONSerializable, metaclass=JobMeta): '''A job descriptor. @@ -627,10 +631,9 @@ def guess_num_tasks(self): ) # Try to guess the number of tasks now - available_nodes = filter_nodes_by_state( + available_nodes = self.scheduler.filternodes_by_state( available_nodes, - self.sched_flex_alloc_nodes.lower(), - self.scheduler + self.sched_flex_alloc_nodes.lower() ) getlogger().debug( f'[F] Total available in state=' diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py index 06c233eaa5..ac9ce340c1 100644 --- a/reframe/core/schedulers/slurm.py +++ b/reframe/core/schedulers/slurm.py @@ -147,7 +147,7 @@ def __init__(self): self._sched_access_in_submit = self.get_option( 'sched_access_in_submit' ) - self.node_available_states = { + self._available_states = { 'ALLOCATED', 'COMPLETING', 'IDLE', @@ -436,6 +436,8 @@ def filternodes(self, job, nodes): def _get_reservation_nodes(self, resv): completed = _run_strict(f'scontrol -a show -o reservations {resv}') + self.log(f'reservation info:\n{completed.stdout}') + node_match = re.search(r'Nodes=(\S+)', completed.stdout) if node_match: reservation_nodes = node_match[1] @@ -446,10 +448,7 @@ def _get_reservation_nodes(self, resv): flags_match = re.search(r'Flags=(\S+)', completed.stdout) if flags_match: if 'MAINT' in flags_match.group(1).split(','): - self.node_available_states.add('MAINTENANCE') - else: - self.log('could not extract the reservation flags for ' - f'reservation {resv!r}') + self._available_states.add('MAINTENANCE') completed = _run_strict( f'scontrol -a show -o nodes {reservation_nodes}' @@ -648,7 +647,7 @@ def finished(self, job): return slurm_state_completed(job.state) def is_node_avail(self, node): - return node.states <= self.node_available_states + return node.states <= self._available_states def is_node_down(self, node): return not self.is_node_avail(node) diff --git a/reframe/frontend/testgenerators.py b/reframe/frontend/testgenerators.py index 27f36f86c9..546a7a535c 100644 --- a/reframe/frontend/testgenerators.py +++ b/reframe/frontend/testgenerators.py @@ -14,7 +14,7 @@ from reframe.core.fields import make_convertible from reframe.core.logging import getlogger, time_function from reframe.core.meta import make_test -from reframe.core.schedulers import Job, filter_nodes_by_state +from reframe.core.schedulers import Job from reframe.frontend.executors import generate_testcases @@ -38,10 +38,8 @@ def getallnodes(state, jobs_cli_options=None): f'Total available nodes for {part.name}: {len(available_nodes)}' ) - available_nodes = filter_nodes_by_state( - available_nodes, - state, - part.scheduler + available_nodes = part.scheduler.filternodes_by_state( + available_nodes, state ) nodes[part.fullname] = [n.name for n in available_nodes] From 52d57d1491e4d7d1091ed862557bda15b4877cfc Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 28 Nov 2025 13:47:16 +0100 Subject: [PATCH 8/8] Update docs of `--distribute` option --- docs/manpage.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/manpage.rst b/docs/manpage.rst index 296eb4078f..55d3ca795d 100644 --- a/docs/manpage.rst +++ b/docs/manpage.rst @@ -1032,6 +1032,7 @@ The way the tests are generated and how they interact with the test filtering op - ``all``: Tests will run on all the nodes of their respective valid partitions regardless of the node state. - ``avail``: Tests will run on all the nodes of their respective valid partitions that are available for running jobs. Note that if a node is currently allocated to another job it is still considered as "available." + Also, for ReFrame partitions using the Slurm backends, if this option is used on a reservation with the ``MAINT`` flag set, then nodes in ``MAINTENANCE`` state will also be considered as available. - ``NODESTATE``: Tests will run on all the nodes of their respective valid partitions that are exclusively in state ``NODESTATE``. If ``NODESTATE`` is not specified, ``idle`` is assumed. - ``NODESTATE*``: Tests will run on all the nodes of their respective valid partitions that are at least in state ``NODESTATE``. @@ -1060,8 +1061,13 @@ The way the tests are generated and how they interact with the test filtering op To achieve the previous behaviour, you should use ``--distribute=idle*``. .. versionchanged:: 4.9 + ``--distribute=NODESTATE`` now allows you to specify multiple valid states using the ``|`` character. + .. versionchanged:: 4.10 + + Nodes in ``MAINTENANCE`` state are considered available, if this option is run on a Slurm reservation with the ``MAINT`` flag set. + .. option:: -P, --parameterize=[TEST.]VAR=VAL0,VAL1,... Parameterize a test on an existing variable or parameter.