-
Notifications
You must be signed in to change notification settings - Fork 73
[#746] establish default order for replicas listed by an iRODSDataObject
#815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b5bdf84
93dae32
ff2c8df
d229216
06c70d2
4f9e624
a9c4e99
3eafc04
47346b0
fcc215d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,9 @@ | ||
| import ast | ||
| import datetime | ||
| import io | ||
| import sys | ||
| import logging | ||
| import os | ||
| import ast | ||
| import sys | ||
|
|
||
| from irods.models import DataObject | ||
| from irods.meta import iRODSMetaCollection | ||
|
|
@@ -41,11 +42,30 @@ | |
| return "<{}.{} {}>".format(self.__class__.__module__, self.__class__.__name__, self.resource_name) | ||
|
|
||
|
|
||
| _REPL_STATUSES = (1, 0, 2, 3, 4) | ||
| _REFERENCE_DATETIME = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we use this instead to get this value? datetime.fromtimestamp(0, timezone.utc)https://docs.python.org/3/library/datetime.html#datetime.datetime.fromtimestamp |
||
|
|
||
|
|
||
| def _REPLICA_NUMBER_SORT_KEY_FN(row): | ||
| return row[DataObject.replica_number] | ||
|
|
||
| def _REPLICA_FITNESS_SORT_KEY_FN(row): | ||
|
Check failure on line 52 in irods/data_object.py
|
||
| repl_status = int(row[DataObject.replica_status]) | ||
|
|
||
| repl_status_rank = _REPL_STATUSES.index(repl_status) if _REPL_STATUSES.count(repl_status) else sys.maxsize | ||
|
|
||
| return (repl_status_rank, _REFERENCE_DATETIME - row[DataObject.modify_time]) | ||
|
|
||
|
|
||
| _DEFAULT_SORT_KEY_FN = _REPLICA_NUMBER_SORT_KEY_FN | ||
|
|
||
|
|
||
| class iRODSDataObject: | ||
| def __init__(self, manager, parent=None, results=None): | ||
| def __init__(self, manager, parent=None, results=None, replica_sort_function=None): | ||
| self.manager = manager | ||
| if parent and results: | ||
| self.collection = parent | ||
| results = sorted(results, key=(replica_sort_function or _DEFAULT_SORT_KEY_FN)) | ||
| for attr, value in DataObject.__dict__.items(): | ||
| if not attr.startswith("_"): | ||
| try: | ||
|
|
@@ -54,9 +74,8 @@ | |
| # backward compatibility with older schema versions | ||
| pass | ||
| self.path = self.collection.path + "/" + self.name | ||
| replicas = sorted(results, key=lambda r: r[DataObject.replica_number]) | ||
|
|
||
| # The status quo before iRODS 5 | ||
| # Copy pre-iRODS 5 fields | ||
|
|
||
| replica_args = [ | ||
| ( | ||
|
|
@@ -75,13 +94,13 @@ | |
| modify_time=r[DataObject.modify_time], | ||
| ), | ||
| ) | ||
| for r in replicas | ||
| for r in results | ||
| ] | ||
|
|
||
| # Adjust for adding access_time in the iRODS 5 case. | ||
|
|
||
| if self.manager.sess.server_version >= (5,): | ||
| for n, r in enumerate(replicas): | ||
| for n, r in enumerate(results): | ||
| replica_args[n][1]['access_time'] = r[DataObject.access_time] | ||
| self.replicas = [iRODSReplica(*a, **k) for a, k in replica_args] | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,26 +49,26 @@ | |
| return localhost_with_optional_domain_pattern.match(name.lower()) or is_localhost_ip(name) | ||
|
|
||
|
|
||
| from irods.access import iRODSAccess | ||
| from irods.models import Collection, DataObject | ||
| from irods.path import iRODSPath | ||
| from irods.test.helpers import iRODSUserLogins | ||
| import irods.exception as ex | ||
| from irods.column import Criterion | ||
| from irods.data_object import chunks, irods_dirname | ||
| from irods.data_object import chunks, irods_dirname, _REPLICA_FITNESS_SORT_KEY_FN | ||
| import irods.test.helpers as helpers | ||
| import irods.test.modules as test_modules | ||
| import irods.keywords as kw | ||
| import irods.client_configuration as config | ||
| from irods.manager import data_object_manager | ||
| from irods.message import RErrorStack | ||
| from irods.message import ET, XML_Parser_Type, default_XML_parser, current_XML_parser | ||
| from datetime import datetime, timezone, timedelta | ||
| from tempfile import NamedTemporaryFile, gettempdir, mktemp | ||
| from irods.test.helpers import unique_name, my_function_name | ||
| from irods.ticket import Ticket | ||
| import irods.parallel | ||
| from irods.manager.data_object_manager import Server_Checksum_Warning | ||
|
|
||
| RODSUSER = "nonadmin" | ||
|
|
||
|
|
@@ -1253,8 +1253,7 @@ | |
|
|
||
| # assertions on replicas | ||
| self.assertEqual(len(obj.replicas), number_of_replicas) | ||
| for i, replica in enumerate(obj.replicas): | ||
| self.assertEqual(replica.number, i) | ||
| self.assertEqual({repl.number for repl in obj.replicas}, {*range(len(obj.replicas))}) | ||
|
|
||
| # now trim odd-numbered replicas | ||
| # note (see irods/irods#4861): COPIES_KW might disappear in the future | ||
|
|
@@ -1267,10 +1266,7 @@ | |
| obj = session.data_objects.get(obj_path) | ||
|
|
||
| # check remaining replica numbers | ||
| replica_numbers = [] | ||
| for replica in obj.replicas: | ||
| replica_numbers.append(replica.number) | ||
| self.assertEqual(replica_numbers, [0, 2, 4, 6]) | ||
| self.assertEqual({r.number for r in obj.replicas}, {0, 2, 4, 6}) | ||
|
|
||
| # remove object | ||
| obj.unlink(force=True) | ||
|
|
@@ -1728,11 +1724,12 @@ | |
| self.assertIsNotNone(obj.replicas[1].__getattribute__(i)) | ||
|
|
||
| # ensure replica info is sensible | ||
| replicas = sorted(obj.replicas, key=lambda repl: repl.number) | ||
| for i in range(2): | ||
| self.assertEqual(obj.replicas[i].number, i) | ||
| self.assertEqual(obj.replicas[i].status, "1") | ||
| self.assertEqual(obj.replicas[i].path.split("/")[-1], filename) | ||
| self.assertEqual(obj.replicas[i].resc_hier.split(";")[-1], ufs_resources[i].name) | ||
| self.assertEqual(replicas[i].number, i) | ||
| self.assertEqual(replicas[i].status, "1") | ||
| self.assertEqual(replicas[i].path.split("/")[-1], filename) | ||
| self.assertEqual(replicas[i].resc_hier.split(";")[-1], ufs_resources[i].name) | ||
|
|
||
| self.assertEqual(obj.replicas[0].resource_name, ufs_resources[0].name) | ||
| if self.sess.server_version < (4, 2, 0): | ||
|
|
@@ -2992,6 +2989,49 @@ | |
|
|
||
| test_put__issue_722(self) | ||
|
|
||
| def test_modified_sorting_of_replicas__issue_746(self): | ||
| basename = unique_name(my_function_name(), datetime.now()) + '_dataobj_647' | ||
| with self.create_simple_resc() as newResc1, self.create_simple_resc() as newResc2: | ||
| data = helpers.make_object(self.sess, f'{helpers.home_collection(self.sess)}/{basename}') | ||
|
|
||
| # Precondition for an eventual total of 3 replicas: initial data replica is not on either of the new resources. | ||
| self.assertFalse({repl.resource_name for repl in data.replicas} & {newResc1, newResc2}) | ||
| try: | ||
| data.replicate(resource=newResc1) | ||
|
|
||
| # Ensure that one of the replicas is stale, to test proper sorting. | ||
| with data.open('a', **{kw.RESC_NAME_KW: newResc1}) as f: | ||
| f.write(b'.') | ||
| time.sleep(2) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's put a comment explaining that this sleep is to ensure the replicas on |
||
|
|
||
| data.replicate(resource=newResc2) | ||
|
|
||
| # At this point, there should ensure exactly two good replicas of the three. | ||
| # Assert exactly one replica is stale, to corroborate | ||
| data = self.sess.data_objects.get( | ||
| data.path, | ||
| replica_sort_function=lambda row: int(row[DataObject.replica_status]) | ||
| ) | ||
| self.assertEqual( | ||
| [repl.status for repl in data.replicas], | ||
| ['0', '1', '1'] | ||
| ) | ||
|
|
||
| options = {} | ||
| if irods.version.version_as_tuple() < (4,): | ||
| options['replica_sort_function'] = _REPLICA_FITNESS_SORT_KEY_FN | ||
|
|
||
| # Get a data object with the PRC3-alternative/PRC4-default sort order. | ||
| data = self.sess.data_objects.get(data.path, **options) | ||
|
|
||
| # Test default replica sorting. | ||
| self.assertEqual(data.replicas[0].status, '1') | ||
| self.assertEqual(data.replicas[0].modify_time, data.modify_time) | ||
| self.assertGreater(data.replicas[0].modify_time, data.replicas[1].modify_time) | ||
| finally: | ||
| if data: | ||
| data.unlink(force=True) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| # let the tests find the parent irods lib | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a possible future improvement, it may be beneficial to represent replica statuses as a proper enumeration with names and all that.