From 13ad8762838aab2a35c803135453fb3b1bdbe005 Mon Sep 17 00:00:00 2001 From: antkryt Date: Thu, 18 Jun 2026 16:46:45 +0300 Subject: [PATCH 1/3] [ENG-11342] BE: Add minimal osfstorage_get_children for DAZ (#11776) --- addons/osfstorage/views.py | 74 ++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/addons/osfstorage/views.py b/addons/osfstorage/views.py index e387f34a768..63110ea313a 100644 --- a/addons/osfstorage/views.py +++ b/addons/osfstorage/views.py @@ -2,16 +2,19 @@ import logging from django.core.exceptions import ValidationError +from django.contrib.contenttypes.models import ContentType from django.db import IntegrityError from django.db import connection from django.db import transaction - +from django.db.models import Case, CharField, F, Value, When +from django.db.models.functions import Concat from flask import request from framework.auth import Auth from framework.exceptions import HTTPError from framework.auth.decorators import must_be_signed, must_be_logged_in +from api.base.utils import is_truthy from osf.exceptions import InvalidTagError, TagNotFoundError from osf.models import FileVersion, Node, OSFUser from osf.utils.permissions import WRITE @@ -25,7 +28,7 @@ from website.settings import StorageLimits from addons.osfstorage import utils from addons.osfstorage import decorators -from addons.osfstorage.models import OsfStorageFolder +from addons.osfstorage.models import OsfStorageFileNode, OsfStorageFolder from addons.osfstorage import settings as osf_storage_settings @@ -183,12 +186,59 @@ def osfstorage_get_metadata(file_node, **kwargs): return file_node.serialize(version=version, include_full=True) -@must_be_signed -@decorators.autoload_filenode(must_be='folder') -def osfstorage_get_children(file_node, **kwargs): - from django.contrib.contenttypes.models import ContentType +# TODO: Remove _osfstorage_minimal_metadata_sql if the Django ORM implementation +# performs well in production benchmarks and zip/DAZ workloads. +def _osfstorage_minimal_metadata_sql(file_node): + with connection.cursor() as cursor: + cursor.execute(""" + SELECT json_agg( + CASE + WHEN F.type = 'osf.osfstoragefile' THEN + json_build_object( + 'kind', 'file', + 'name', F.name, + 'path', '/' || F._id + ) + ELSE + json_build_object( + 'kind', 'folder', + 'name', F.name, + 'path', '/' || F._id || '/' + ) + END + ) + FROM osf_basefilenode AS F + WHERE F.parent_id = %s + AND F.type IN ('osf.osfstoragefile', 'osf.osfstoragefolder') + """, [file_node.id]) + return cursor.fetchone()[0] or [] + + +def _osfstorage_minimal_metadata_orm(file_node): + return list( + OsfStorageFileNode.objects + .filter( + parent_id=file_node.id, + type__in=('osf.osfstoragefile', 'osf.osfstoragefolder'), + ) + .annotate( + kind=Case( + When(type='osf.osfstoragefile', then=Value('file')), + default=Value('folder'), + output_field=CharField(), + ), + path=Case( + When(type='osf.osfstoragefile', then=Concat(Value('/'), F('_id'))), + default=Concat(Value('/'), F('_id'), Value('/')), + output_field=CharField(), + ), + ) + .values('kind', 'name', 'path') + ) + + +def _osfstorage_full_metadata(file_node, user_id): from osf.models.preprint import Preprint - user_id = request.args.get('user_id') user_content_type_id = ContentType.objects.get_for_model(OSFUser).id user_pk = OSFUser.objects.filter(guids___id=user_id, guids___id__isnull=False).values_list('pk', flat=True).first() guid_id = file_node.target.get_guid().id if isinstance(file_node.target, Preprint) else file_node.target.guids.first().id, @@ -294,6 +344,16 @@ def osfstorage_get_children(file_node, **kwargs): return cursor.fetchone()[0] or [] +@must_be_signed +@decorators.autoload_filenode(must_be='folder') +def osfstorage_get_children(file_node, **kwargs): + if is_truthy(request.args.get('minimal')): + if is_truthy(request.args.get('orm')): + return _osfstorage_minimal_metadata_orm(file_node) + return _osfstorage_minimal_metadata_sql(file_node) + return _osfstorage_full_metadata(file_node, request.args.get('user_id')) + + @must_be_signed @decorators.autoload_filenode(must_be='folder') def osfstorage_create_child(file_node, payload, **kwargs): From 68737e7fdaad84eb9d82c509896bce2a6c26c83d Mon Sep 17 00:00:00 2001 From: antkryt Date: Fri, 19 Jun 2026 18:09:54 +0300 Subject: [PATCH 2/3] [ENG-11467] Add pagination for `osfstorage_get_children` Django ORM implementation (#11779) --- addons/osfstorage/views.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/addons/osfstorage/views.py b/addons/osfstorage/views.py index 63110ea313a..b3cb52edb48 100644 --- a/addons/osfstorage/views.py +++ b/addons/osfstorage/views.py @@ -214,13 +214,16 @@ def _osfstorage_minimal_metadata_sql(file_node): return cursor.fetchone()[0] or [] -def _osfstorage_minimal_metadata_orm(file_node): - return list( - OsfStorageFileNode.objects - .filter( - parent_id=file_node.id, - type__in=('osf.osfstoragefile', 'osf.osfstoragefolder'), - ) +def _osfstorage_minimal_metadata_orm(file_node, *, limit=None, after=None): + qs = OsfStorageFileNode.objects.filter( + parent_id=file_node.id, + type__in=('osf.osfstoragefile', 'osf.osfstoragefolder'), + ) + if after is not None: + qs = qs.filter(id__gt=after) + + qs = ( + qs .annotate( kind=Case( When(type='osf.osfstoragefile', then=Value('file')), @@ -233,9 +236,15 @@ def _osfstorage_minimal_metadata_orm(file_node): output_field=CharField(), ), ) - .values('kind', 'name', 'path') + .order_by('id') + .values('id', 'kind', 'name', 'path') ) + if limit is not None: + qs = qs[:limit] + + return list(qs) + def _osfstorage_full_metadata(file_node, user_id): from osf.models.preprint import Preprint @@ -349,7 +358,11 @@ def _osfstorage_full_metadata(file_node, user_id): def osfstorage_get_children(file_node, **kwargs): if is_truthy(request.args.get('minimal')): if is_truthy(request.args.get('orm')): - return _osfstorage_minimal_metadata_orm(file_node) + return _osfstorage_minimal_metadata_orm( + file_node, + limit=request.args.get('limit', type=int, default=None), + after=request.args.get('after', type=int, default=None), + ) return _osfstorage_minimal_metadata_sql(file_node) return _osfstorage_full_metadata(file_node, request.args.get('user_id')) From 7c324014e85de32db187b651b57388760be73b95 Mon Sep 17 00:00:00 2001 From: antkryt Date: Mon, 22 Jun 2026 18:07:41 +0300 Subject: [PATCH 3/3] [ENG-11353] 1.2.4 - BE: Unit tests (#11780) * add unit tests for minimal osfstorage_get_children * minor test fix --- addons/osfstorage/tests/test_views.py | 204 ++++++++++++++++++-------- 1 file changed, 142 insertions(+), 62 deletions(-) diff --git a/addons/osfstorage/tests/test_views.py b/addons/osfstorage/tests/test_views.py index 825b68b2671..1a3513f5a80 100644 --- a/addons/osfstorage/tests/test_views.py +++ b/addons/osfstorage/tests/test_views.py @@ -67,16 +67,6 @@ def send_hook(self, view_name, view_kwargs, payload, target, method='get', **kwa @pytest.mark.django_db class TestGetMetadataHook(HookTestCase): - def test_empty(self): - res = self.send_hook( - 'osfstorage_get_children', - {'fid': self.node_settings.get_root()._id, 'user_id': self.user._id}, - {}, - self.node - ) - assert isinstance(res.json, list) - assert res.json == [] - def test_file_metadata(self): path = 'kind/of/magíc.mp3' record = recursively_create_file(self.node_settings, path) @@ -107,6 +97,97 @@ def test_preprint_primary_file_metadata(self): assert isinstance(res.json, dict) assert res.json == record.parent.serialize(True) + def test_osf_storage_root(self): + auth = Auth(self.project.creator) + result = osf_storage_root(self.node_settings.config, self.node_settings, auth) + node = self.project + expected = rubeus.build_addon_root( + node_settings=self.node_settings, + name='', + permissions=auth, + user=auth.user, + nodeUrl=node.url, + nodeApiUrl=node.api_url, + ) + root = result[0] + assert root == expected + + def test_root_default(self): + res = self.send_hook('osfstorage_get_metadata', {}, {}, self.node) + + assert res.json['fullPath'] == '/' + assert res.json['id'] == self.node_settings.get_root()._id + + def test_root_preprint_default(self): + preprint = PreprintFactory() + res = self.send_hook('osfstorage_get_metadata', {}, {}, preprint) + + assert res.json['fullPath'] == '/' + assert res.json['id'] == preprint.root_folder._id + + def test_metadata_not_found(self): + res = self.send_hook( + 'osfstorage_get_metadata', + {'fid': 'somebogusid'}, {}, + self.node, + ) + assert res.status_code == 404 + + def test_metadata_not_found_lots_of_slashes(self): + res = self.send_hook( + 'osfstorage_get_metadata', + {'fid': '/not/fo/u/nd/'}, {}, + self.node, + ) + assert res.status_code == 302 + assert '/login?service=' in res.location + + self.node.is_public = True + self.node.save() + res = self.send_hook( + 'osfstorage_get_metadata', + {'fid': '/not/fo/u/nd/'}, {}, + self.node, + ) + assert res.status_code == 404 + + +@pytest.mark.django_db +class TestGetChildrenHook(HookTestCase): + + def get_children(self, parent, target=None, *, orm=False, **query_params): + params = { + 'fid': parent._id, + 'user_id': self.user._id, + 'minimal': 'true', + **query_params, + } + if orm: + params['orm'] = 'true' + return self.send_hook( + 'osfstorage_get_children', + params, + {}, + target or self.node, + ) + + def _create_child_file(self, parent, name): + record = parent.append_file(name) + version = factories.FileVersionFactory() + record.add_version(version) + record.save() + return record + + def test_empty(self): + res = self.send_hook( + 'osfstorage_get_children', + {'fid': self.node_settings.get_root()._id, 'user_id': self.user._id}, + {}, + self.node + ) + assert isinstance(res.json, list) + assert res.json == [] + def test_children_metadata(self): path = 'kind/of/magíc.mp3' record = recursively_create_file(self.node_settings, path) @@ -176,59 +257,58 @@ def test_children_metadata_preprint(self): assert res_date_modified == expected_date_modified assert res_date_created == expected_date_created - def test_osf_storage_root(self): - auth = Auth(self.project.creator) - result = osf_storage_root(self.node_settings.config, self.node_settings, auth) - node = self.project - expected = rubeus.build_addon_root( - node_settings=self.node_settings, - name='', - permissions=auth, - user=auth.user, - nodeUrl=node.url, - nodeApiUrl=node.api_url, - ) - root = result[0] - assert root == expected - - def test_root_default(self): - res = self.send_hook('osfstorage_get_metadata', {}, {}, self.node) - - assert res.json['fullPath'] == '/' - assert res.json['id'] == self.node_settings.get_root()._id - - def test_root_preprint_default(self): - preprint = PreprintFactory() - res = self.send_hook('osfstorage_get_metadata', {}, {}, preprint) - - assert res.json['fullPath'] == '/' - assert res.json['id'] == preprint.root_folder._id - - def test_metadata_not_found(self): - res = self.send_hook( - 'osfstorage_get_metadata', - {'fid': 'somebogusid'}, {}, - self.node, - ) - assert res.status_code == 404 - - def test_metadata_not_found_lots_of_slashes(self): - res = self.send_hook( - 'osfstorage_get_metadata', - {'fid': '/not/fo/u/nd/'}, {}, - self.node, - ) - assert res.status_code == 302 - assert '/login?service=' in res.location - - self.node.is_public = True - self.node.save() - res = self.send_hook( - 'osfstorage_get_metadata', - {'fid': '/not/fo/u/nd/'}, {}, - self.node, + def test_minimal_child_fields(self): + for orm in (False, True): + with self.subTest(orm='orm' if orm else 'sql'): + parent = self.node_settings.get_root().append_folder( + f'minimal-child-fields-{"orm" if orm else "sql"}' + ) + record = self._create_child_file(parent, 'magíc.mp3') + folder = parent.append_folder('nested') + res = self.get_children(parent, orm=orm) + assert res.status_code == 200 + assert len(res.json) == 2 + assert {item['name'] for item in res.json} == {record.name, folder.name} + expected_keys = {'kind', 'name', 'path'} + if orm: + expected_keys.add('id') + for item in res.json: + assert set(item.keys()) == expected_keys + if item['kind'] == 'file': + assert item['path'] == f'/{record._id}' + if orm: + assert item['id'] == record.id + else: + assert item['path'] == f'/{folder._id}/' + if orm: + assert item['id'] == folder.id + + def test_minimal_pagination(self): + parent = self.node_settings.get_root().append_folder('pagination') + children = [ + self._create_child_file(parent, name) + for name in ('a.mp3', 'b.mp3', 'c.mp3') + ] + children.sort(key=lambda c: c.id) + first_page = self.get_children(parent, orm=True, limit=2) + assert first_page.status_code == 200 + assert [item['id'] for item in first_page.json] == [children[0].id, children[1].id] + second_page = self.get_children( + parent, + orm=True, + limit='2', + after=str(children[1].id), ) - assert res.status_code == 404 + assert second_page.status_code == 200 + assert len(second_page.json) == 1 + assert second_page.json[0]['id'] == children[2].id + + def test_minimal_pagination_after_beyond_last_child(self): + parent = self.node_settings.get_root() + record = self._create_child_file(parent, 'only.mp3') + res = self.get_children(parent, orm=True, after=str(record.id)) + assert res.status_code == 200 + assert res.json == [] @pytest.mark.django_db