Skip to content

Commit ef49be6

Browse files
authored
[ENG-9122] Fix/eng 9122 (#11435)
* Update fix * add manage command to manual reindex preprints
1 parent 7c67707 commit ef49be6

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import logging
2+
from django.core.management.base import BaseCommand
3+
4+
from osf.models import Preprint
5+
6+
logger = logging.getLogger(__name__)
7+
8+
9+
def reindex_versioned_preprints(dry_run=False, batch_size=100, provider_id=None, guids=None):
10+
if guids:
11+
preprints = Preprint.objects.filter(guids___id__in=guids)
12+
else:
13+
preprints = Preprint.objects.filter(versioned_guids__isnull=False).distinct()
14+
15+
if provider_id:
16+
preprints = preprints.filter(provider___id=provider_id)
17+
18+
preprints = preprints.filter(is_published=True)
19+
20+
total_count = preprints.count()
21+
logger.info(f'{"[DRY RUN] " if dry_run else ""}Found {total_count} versioned preprints to re-index')
22+
23+
if total_count == 0:
24+
logger.info('No preprints to re-index')
25+
return
26+
27+
processed = 0
28+
for preprint in preprints.iterator(chunk_size=batch_size):
29+
processed += 1
30+
31+
if dry_run:
32+
logger.info(
33+
f'[DRY RUN] Would re-index preprint {preprint._id} '
34+
f'(version {preprint.versioned_guids.first().version if preprint.versioned_guids.exists() else "N/A"}, '
35+
f'date_created_first_version={preprint.date_created_first_version}) '
36+
f'[{processed}/{total_count}]'
37+
)
38+
else:
39+
try:
40+
preprint.update_search()
41+
if processed % 10 == 0:
42+
logger.info(
43+
f'Re-indexed preprint {preprint._id} '
44+
f'(version {preprint.versioned_guids.first().version if preprint.versioned_guids.exists() else "N/A"}) '
45+
f'[{processed}/{total_count}]'
46+
)
47+
except Exception as e:
48+
logger.error(f'Failed to re-index preprint {preprint._id}: {e}')
49+
50+
logger.info(
51+
f'{"[DRY RUN] " if dry_run else ""}Completed. '
52+
f'{"Would have re-indexed" if dry_run else "Re-indexed"} {processed} preprints'
53+
)
54+
55+
56+
class Command(BaseCommand):
57+
help = (
58+
'Re-index all versioned preprints to Elasticsearch to ensure computed properties '
59+
'like date_created_first_version are up to date.'
60+
)
61+
62+
def add_arguments(self, parser):
63+
super().add_arguments(parser)
64+
parser.add_argument(
65+
'--dry-run',
66+
action='store_true',
67+
dest='dry_run',
68+
help='Preview what would be re-indexed without actually making changes',
69+
)
70+
parser.add_argument(
71+
'--batch-size',
72+
type=int,
73+
default=100,
74+
help='Number of preprints to process in each batch (default: 100)',
75+
)
76+
parser.add_argument(
77+
'--provider',
78+
type=str,
79+
help='Optional provider ID to filter preprints',
80+
)
81+
parser.add_argument(
82+
'--guids',
83+
type=str,
84+
nargs='+',
85+
help='Optional list of specific preprint GUIDs to re-index',
86+
)
87+
88+
def handle(self, *args, **options):
89+
dry_run = options.get('dry_run', False)
90+
batch_size = options.get('batch_size', 100)
91+
provider_id = options.get('provider')
92+
guids = options.get('guids')
93+
94+
if dry_run:
95+
logger.info('=' * 60)
96+
logger.info('DRY RUN MODE - No changes will be made')
97+
logger.info('=' * 60)
98+
99+
reindex_versioned_preprints(
100+
dry_run=dry_run,
101+
batch_size=batch_size,
102+
provider_id=provider_id,
103+
guids=guids
104+
)

0 commit comments

Comments
 (0)