Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions admin/base/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
re_path(r'^cedar_metadata_templates/', include('admin.cedar.urls', namespace='cedar_metadata_templates')),
re_path(r'^draft_registrations/', include('admin.draft_registrations.urls', namespace='draft_registrations')),
re_path(r'^files/', include('admin.files.urls', namespace='files')),
re_path(r'^share_reindex/', include('admin.share_reindex.urls', namespace='share_reindex')),
]),
),
]
Expand Down
Empty file added admin/share_reindex/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions admin/share_reindex/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.urls import re_path
from . import views

app_name = 'admin'

urlpatterns = [
re_path(r'^$', views.FailedShareIndexedGuidList.as_view(), name='list'),
re_path(r'^(?P<resource_type>[^/]+)/$', views.FailedShareIndexedGuidReindex.as_view(), name='reindex-share-resource'),
]
52 changes: 52 additions & 0 deletions admin/share_reindex/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from django.contrib.auth.mixins import PermissionRequiredMixin
from django.urls import reverse
from django.shortcuts import redirect
from django.views.generic import ListView, View
from osf.models import Guid
from urllib.parse import urlencode
from api.share.utils import get_not_indexed_guids_for_resource_with_no_indexed_guid, task__reindex_failed_or_not_indexed_resource_into_share

class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
paginate_by = 25
template_name = 'share_reindex/list.html'
permission_required = 'osf.update_share_reindex'
raise_exception = True
model = Guid

def get_queryset(self):
resource_type = self.request.GET.get('type', 'projects')
return get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type)

def get_context_data(self, **kwargs):
query_set = kwargs.pop('object_list', self.object_list)
page_size = self.get_paginate_by(query_set)
paginator, page, query_set, is_paginated = self.paginate_queryset(query_set, page_size)
kwargs.setdefault('items_to_index', query_set)
kwargs.setdefault('page', page)
resource_type = self.request.GET.get('type', 'projects')
kwargs.setdefault('selected_resource_type', resource_type)
resource_type_detail_mapping = {
'users': 'users:user', 'preprints': 'preprints:preprint', 'registries': 'nodes:node', 'projects': 'nodes:node', 'files': 'files:file'
}

kwargs.setdefault('resource_detail', resource_type_detail_mapping.get(resource_type))
resource_type_guid_reindex = {
'users': 'users:reindex-share-user', 'preprints': 'preprints:reindex-share-preprint', 'registries': 'nodes:reindex-share-node', 'projects': 'nodes:reindex-share-node'
}
kwargs.setdefault('resource_guid_reindex', resource_type_guid_reindex.get(resource_type))
status_msg = f'Reindex of {resource_type} started, please check later.' if self.request.GET.get('status') == 'indexing' else ''
kwargs.setdefault('share_reindex_message', status_msg)
return super().get_context_data(**kwargs)


class FailedShareIndexedGuidReindex(PermissionRequiredMixin, View):
permission_required = 'osf.update_share_reindex'
raise_exception = True

def post(self, request, *args, **kwargs):
resource_type = self.kwargs.get('resource_type')
# reindex 100_000 guids in background task for specific resource_type and resource is public
task__reindex_failed_or_not_indexed_resource_into_share.delay(resource_type)
base_url = reverse('share_reindex:list')
query_string = urlencode({'type': resource_type, 'status': 'indexing'})
return redirect(f"{base_url}?{query_string}")
3 changes: 3 additions & 0 deletions admin/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@
{% if perms.osf.change_cedarmetadatatemplate %}
<li><a href="{% url 'cedar_metadata_templates:list' %}"><i class='fa fa-link'></i> <span>Cedar Metadata Templates</span></a></li>
{% endif %}
{% if perms.osf.update_share_reindex %}
<li><a href="{% url 'share_reindex:list' %}"><i class='fa fa-link'></i> <span>Share Reindex</span></a></li>
{% endif %}
{% if perms.osf.change_maintenancestate %}
<li><a href="{% url 'maintenance:display' %}"><i class='fa fa-link'></i> <span>Maintenance Alerts</span></a></li>
{% endif %}
Expand Down
133 changes: 133 additions & 0 deletions admin/templates/share_reindex/list.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{% extends "base.html" %}
{% load render_bundle from webpack_loader %}
{% load comment_extras %}

{% load static %}
{% block top_includes %}
<link rel="stylesheet" type="text/css" href="/static/css/institutions.css" />
{% endblock %}
{% block title %}
<title>Share Reindex</title>
{% endblock title %}
{% block content %}
<h2>Share Reindex</h2>

{% include "util/pagination.html" with items=page extra_query_params="&type="|add:selected_resource_type %}


<div class="row" style="margin-bottom: 20px;">
<div class="col-md-3">
<form method="GET" action="">
<select class="form-control" name="type" onchange="this.form.submit()">
<option value="projects" {% if selected_resource_type == 'projects' %}selected{% endif %}>Projects</option>
<option value="preprints" {% if selected_resource_type == 'preprints' %}selected{% endif %}>Preprints</option>
<option value="registries" {% if selected_resource_type == 'registries' %}selected{% endif %}>Registries</option>
<option value="users" {% if selected_resource_type == 'users' %}selected{% endif %}>Users</option>
<option value="files" {% if selected_resource_type == 'files' %}selected{% endif %}>Files</option>
</select>

</form>
</div>
<div class="col-md-3">
<a data-toggle="modal" data-target="#confirmReindexShareNodes" class="btn btn-primary">
SHARE Reindex All {{selected_resource_type}}
</a>

<div class="modal" id="confirmReindexShareNodes">
<div class="modal-dialog">
<div class="modal-content">

<form method="post" action="{% url 'admin:reindex-share-resource' resource_type=selected_resource_type %}">
{% csrf_token %}
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">×</button>
<h3>Are you sure you want to reindex {{selected_resource_type}} (SHARE)?</h3>
</div>

<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
<input class="btn btn-primary" type="submit" value="Confirm Re-index" />
</div>
</form>

</div>
</div>
</div>
</div>
</div>

<div>
<p>{{share_reindex_message}}</p>
</div>


<table class="table table-striped table-hover table-responsive">
<thead>
<tr>
<th>Guid</th>
{% if selected_resource_type == 'projects' or selected_resource_type == 'preprints' or selected_resource_type == 'registries' %}
<th>Title</th>
{% elif selected_resource_type == 'users' %}
<th>Fullname</th>
{% else %}
<th>Name</th>
{% endif %}
<th>Datetime Last Indexed</th>
<!-- there is no a file indexing option for detail page for now -->
{% if selected_resource_type != 'files' %}
<th>Reindex</th>
{% endif %}
</tr>
</thead>
<tbody>
{% for item in items_to_index %}
<tr>
<td>
<a href="{% url resource_detail guid=item.first_guid %}">
{{item.first_guid}}
</a>
</td>
{% if selected_resource_type == 'projects' or selected_resource_type == 'preprints' or selected_resource_type == 'registries' %}
<td>{{item.title}}</td>
{% elif selected_resource_type == 'users' %}
<td>{{item.fullname}}</td>
{% else %}
<th>{{item.name}}</th>
{% endif %}

<td>{{item.date_last_indexed}}</td>

{% if selected_resource_type != 'files' %}
<td>
<a data-toggle="modal" data-target="#confirmReindexShareNode-{{ item.first_guid }}" class="btn btn-primary">SHARE Reindex</a>
</td>
<div class="modal" id="confirmReindexShareNode-{{ item.first_guid }}">
<div class="modal-dialog">
<div class="modal-content">
<form class="well" method="post" action="{% url resource_guid_reindex guid=item.first_guid %}">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">x</button>
<h3>Are you sure you want to reindex this node (SHARE)? {{ item.first_guid }}</h3>
</div>
{% csrf_token %}
<div class="modal-footer">
<input class="btn btn-danger" type="submit" value="Confirm" />
<button type="button" class="btn btn-default" data-dismiss="modal">
Cancel
</button>
</div>
</form>

</div>
{# Data from above link #}
</div>
</div>
{% endif %}


</tr>
{% endfor %}
</tbody>
</table>

{% endblock content %}
14 changes: 7 additions & 7 deletions admin/templates/util/pagination.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
<div class="pagination pagination-lg">
<span>
{% if items.has_previous %}
<a href="?page=1&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page=1&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
|
</a>
<a href="?page={{ items.previous_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.previous_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
<i class="fa fa-angle-left"></i>
</a>
Expand All @@ -25,11 +25,11 @@
</span>

{% if items.has_next %}
<a href="?page={{ items.next_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.next_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
<i class="fa fa-angle-right"></i>
</a>
<a href="?page={{ items.paginator.num_pages }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.paginator.num_pages }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
|
</a>
Expand All @@ -44,11 +44,11 @@
</span>
{% if pagin %}
<span>
<a href="?p=10&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=10&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">10</a>
<a href="?p=25&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=25&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">25</a>
<a href="?p=50&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=50&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">50</a>
</span>
{% endif %}
Expand Down
38 changes: 38 additions & 0 deletions api/share/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
import logging

from django.apps import apps
from django.db.models import Q, OuterRef, Subquery
from django.contrib.contenttypes.models import ContentType
from celery.utils.time import get_exponential_backoff_interval
import requests


from framework.celery_tasks import app as celery_app
from framework.celery_tasks.handlers import enqueue_task
from framework.encryption import ensure_bytes
Expand Down Expand Up @@ -80,6 +83,7 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
raise ValueError(f'unknown osfguid "{guid}"')
_resource = _osfid_instance.referent
_is_deletion = _should_delete_indexcard(_resource)
_resource.mark_indexing_failed()
try:
_response = (
pls_delete_trove_record(_resource, osfmap_partition=_osfmap_partition)
Expand Down Expand Up @@ -115,6 +119,7 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
if HTTPStatus(_response.status_code).is_server_error:
raise self.retry(exc=e)
else: # success response
_resource.mark_indexing_success()
if not _is_deletion:
# enqueue followup task for supplementary metadata
_next_partition = _next_osfmap_partition(_osfmap_partition)
Expand All @@ -126,6 +131,39 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
)


@celery_app.task
def task__reindex_failed_or_not_indexed_resource_into_share(resource_type: str, start_id: int = 0, chunk_count: int = 200, chunk_size: int = 500):
from osf.management.commands.recatalog_metadata import recatalog
queryset = get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type, first_guid=False)
# chunk count and chunk size up to discussion what will be better with Cloud Team
recatalog(queryset, start_id, chunk_count, chunk_size)


def get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type: str, first_guid: bool = True):
from osf.models import Guid, Registration, Preprint, Node, OSFUser
from addons.osfstorage.models import OsfStorageFile
common_not_indexed_public_resource_extract_query = (
Q(is_public=True) & Q(deleted__isnull=True) &
(Q(has_been_indexed=False) | Q(has_been_indexed__isnull=True))
)
resource_mapper = {
'projects': (Node, common_not_indexed_public_resource_extract_query, ('first_guid', 'date_last_indexed', 'title')),
'preprints': (Preprint, common_not_indexed_public_resource_extract_query & Q(is_published=True), ('first_guid', 'date_last_indexed', 'title')),
'registries': (Registration, common_not_indexed_public_resource_extract_query, ('first_guid', 'date_last_indexed', 'title')),
'users': (OSFUser, Q(is_active=True) & Q(deleted__isnull=True) & (Q(has_been_indexed=False) | Q(has_been_indexed__isnull=True)), ('first_guid', 'fullname', 'date_last_indexed')),
'files': (OsfStorageFile, Q(deleted__isnull=True), ('first_guid', 'name', 'date_last_indexed')),
}
resource_model, query, values_to_return = resource_mapper.get(resource_type, 'projects')
if first_guid:
model_content_type = ContentType.objects.get_for_model(resource_model)
first_guid_sq = Guid.objects.filter(
content_type=model_content_type,
object_id=OuterRef('pk'),
).order_by('created').values('_id')[:1]
return resource_model.objects.filter(query).annotate(first_guid=Subquery(first_guid_sq)).exclude(first_guid__isnull=True).values(*values_to_return)
return resource_model.objects.filter(query)


def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition):
try:
_iri = osf_item.get_semantic_iri()
Expand Down
53 changes: 53 additions & 0 deletions osf/migrations/0038_abstractnode_date_last_indexed_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.26 on 2026-03-31 15:44

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('osf', '0037_notification_refactor_post_release'),
]

operations = [
migrations.AddField(
model_name='abstractnode',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='abstractnode',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='basefilenode',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='basefilenode',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='osfuser',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='osfuser',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='preprint',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='preprint',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
]
4 changes: 2 additions & 2 deletions osf/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from framework import sentry
from .base import BaseModel, OptionalGuidMixin, ObjectIDMixin
from .comment import CommentableMixin
from .mixins import Taggable
from .mixins import Taggable, ShareIndexMixin
from .validators import validate_location
from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField
from osf.utils.fields import NonNaiveDateTimeField
Expand Down Expand Up @@ -64,7 +64,7 @@ class UnableToResolveFileClass(Exception):
pass


class BaseFileNode(TypedModel, CommentableMixin, OptionalGuidMixin, Taggable, ObjectIDMixin, BaseModel):
class BaseFileNode(TypedModel, CommentableMixin, OptionalGuidMixin, Taggable, ObjectIDMixin, ShareIndexMixin, BaseModel):
"""Base class for all provider-specific file models and the trashed file model.
This class should generally not be used or created manually. Use the provider-specific
subclasses instead.
Expand Down
Loading
Loading