11package org .jabref .logic .importer .fetcher ;
22
3- import java .io .IOException ;
3+ import java .io .InputStream ;
44import java .net .HttpURLConnection ;
55import java .net .MalformedURLException ;
66import java .net .URL ;
3232import org .jabref .model .entry .field .StandardField ;
3333import org .jabref .model .entry .identifier .DOI ;
3434import org .jabref .model .entry .types .StandardEntryType ;
35+ import org .jabref .model .util .DummyFileUpdateMonitor ;
3536import org .jabref .model .util .OptionalUtil ;
3637
3738import com .google .common .util .concurrent .RateLimiter ;
@@ -64,6 +65,10 @@ public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {
6465 */
6566 private static final RateLimiter CROSSREF_DCN_RATE_LIMITER = RateLimiter .create (50.0 );
6667
68+ private static final FieldFormatterCleanup NORMALIZE_PAGES = new FieldFormatterCleanup (StandardField .PAGES , new NormalizePagesFormatter ());
69+ private static final FieldFormatterCleanup CLEAR_URL = new FieldFormatterCleanup (StandardField .URL , new ClearFormatter ());
70+ private static final FieldFormatterCleanup HTML_TO_LATEX_TITLE = new FieldFormatterCleanup (StandardField .TITLE , new HtmlToLatexFormatter ());
71+
6772 private final ImportFormatPreferences preferences ;
6873
6974 public DoiFetcher (ImportFormatPreferences preferences ) {
@@ -116,75 +121,73 @@ protected CompletableFuture<Optional<BibEntry>> asyncPerformSearchById(String id
116121
117122 @ Override
118123 public Optional <BibEntry > performSearchById (String identifier ) throws FetcherException {
119- Optional <DOI > doi = DOI .parse (identifier );
120-
121- if (doi .isEmpty ()) {
122- throw new FetcherException (Localization .lang ("Invalid DOI: '%0'." , identifier ));
123- }
124+ DOI doi = DOI .parse (identifier )
125+ .orElseThrow (() -> new FetcherException (Localization .lang ("Invalid DOI: '%0'." , identifier )));
124126
125127 URL doiURL ;
126128 try {
127- doiURL = URLUtil .create (doi .get (). getURIAsASCIIString ());
129+ doiURL = URLUtil .create (doi .getURIAsASCIIString ());
128130 } catch (MalformedURLException e ) {
129131 throw new FetcherException ("Malformed URL" , e );
130132 }
131133
132- try {
133- Optional <BibEntry > fetchedEntry ;
134+ Optional <BibEntry > fetchedEntry ;
134135
135- // mEDRA does not return a parsable bibtex string
136- Optional <String > agency = getAgency (doi .get ());
137- if (agency .isPresent () && "medra" .equalsIgnoreCase (agency .get ())) {
138- return new Medra ().performSearchById (identifier );
139- }
140-
141- // BibTeX data
142- URLDownload download = getUrlDownload (doiURL );
143- download .addHeader ("Accept" , MediaTypes .APPLICATION_BIBTEX );
136+ // mEDRA does not return a parsable bibtex string
137+ Optional <String > agency ;
138+ try {
139+ agency = getAgency (doi );
140+ } catch (MalformedURLException e ) {
141+ throw new FetcherException ("Invalid URL" , e );
142+ }
143+ if (agency .isPresent () && "medra" .equalsIgnoreCase (agency .get ())) {
144+ return new Medra ().performSearchById (identifier );
145+ }
144146
145- String bibtexString ;
146- URLConnection openConnection ;
147+ URLDownload download = getUrlDownload (doiURL );
148+ download .addHeader ("Accept" , MediaTypes .APPLICATION_BIBTEX );
149+ HttpURLConnection connection = (HttpURLConnection ) download .openConnection ();
150+ InputStream inputStream = download .asInputStream (connection );
147151
148- openConnection = download .openConnection ();
149- bibtexString = URLDownload .asString (openConnection ).trim ();
152+ BibtexParser bibtexParser = new BibtexParser (preferences , new DummyFileUpdateMonitor ());
153+ try {
154+ fetchedEntry = bibtexParser .parseEntries (inputStream ).stream ().findFirst ();
155+ } catch (ParseException e ) {
156+ throw new FetcherException (doiURL , "Could not parse BibTeX entry" , e );
157+ }
158+ // Crossref has a dynamic API rate limit
159+ if (agency .isPresent () && "crossref" .equalsIgnoreCase (agency .get ())) {
160+ updateCrossrefAPIRate (connection );
161+ }
162+ connection .disconnect ();
150163
151- // BibTeX entry
152- fetchedEntry = BibtexParser .singleFromString (bibtexString , preferences );
153- fetchedEntry .ifPresent (this ::doPostCleanup );
164+ fetchedEntry .ifPresent (entry -> {
165+ doPostCleanup (entry );
154166
155- // Crossref has a dynamic API rate limit
156- if (agency .isPresent () && "crossref" .equalsIgnoreCase (agency .get ())) {
157- updateCrossrefAPIRate (openConnection );
167+ // Output warnings in case of inconsistencies
168+ entry .getField (StandardField .DOI )
169+ .filter (entryDoi -> entryDoi .equals (doi .asString ()))
170+ .ifPresent (entryDoi -> LOGGER .warn ("Fetched entry's DOI {} is different from requested DOI {}" , entryDoi , identifier ));
171+ if (entry .getField (StandardField .DOI ).isEmpty ()) {
172+ LOGGER .warn ("Fetched entry does not contain doi field {}" , identifier );
158173 }
159174
160- // Check if the entry is an APS journal and add the article id as the page count if page field is missing
161- if (fetchedEntry .isPresent () && fetchedEntry .get ().hasField (StandardField .DOI )) {
162- BibEntry entry = fetchedEntry .get ();
163- if (isAPSJournal (entry , entry .getField (StandardField .DOI ).get ()) && !entry .hasField (StandardField .PAGES )) {
164- setPageCountToArticleId (entry , entry .getField (StandardField .DOI ).get ());
165- }
175+ if (isAPSJournal (entry , doi ) && !entry .hasField (StandardField .PAGES )) {
176+ setPageNumbersBasedOnDoi (entry , doi );
166177 }
178+ });
167179
168- if (openConnection instanceof HttpURLConnection connection ) {
169- connection .disconnect ();
170- }
171- return fetchedEntry ;
172- } catch (IOException e ) {
173- throw new FetcherException (doiURL , Localization .lang ("Connection error" ), e );
174- } catch (ParseException e ) {
175- throw new FetcherException (doiURL , "Could not parse BibTeX entry" , e );
176- } catch (JSONException e ) {
177- throw new FetcherException (doiURL , "Could not retrieve Registration Agency" , e );
178- }
180+ return fetchedEntry ;
179181 }
180182
181183 private void doPostCleanup (BibEntry entry ) {
182- new FieldFormatterCleanup (StandardField .PAGES , new NormalizePagesFormatter ()).cleanup (entry );
183- new FieldFormatterCleanup (StandardField .URL , new ClearFormatter ()).cleanup (entry );
184- new FieldFormatterCleanup (StandardField .TITLE , new HtmlToLatexFormatter ()).cleanup (entry );
184+ NORMALIZE_PAGES .cleanup (entry );
185+ CLEAR_URL .cleanup (entry );
186+ HTML_TO_LATEX_TITLE .cleanup (entry );
187+ entry .trimLeft ();
185188 }
186189
187- private void updateCrossrefAPIRate (URLConnection existingConnection ) {
190+ private synchronized void updateCrossrefAPIRate (URLConnection existingConnection ) {
188191 try {
189192 // Assuming this field is given in seconds
190193 String xRateLimitInterval = existingConnection .getHeaderField ("X-Rate-Limit-Interval" ).replaceAll ("[^\\ .0123456789]" , "" );
@@ -221,8 +224,9 @@ public List<BibEntry> performSearch(@NonNull BibEntry entry) throws FetcherExcep
221224 public Optional <String > getAgency (DOI doi ) throws FetcherException , MalformedURLException {
222225 Optional <String > agency = Optional .empty ();
223226 try {
224- URLDownload download = getUrlDownload (URLUtil .create (DOI .AGENCY_RESOLVER + "/" + URLEncoder .encode (doi .asString (),
225- StandardCharsets .UTF_8 )));
227+ URLDownload download = getUrlDownload (
228+ URLUtil .create (DOI .AGENCY_RESOLVER + "/" + URLEncoder .encode (doi .asString (),
229+ StandardCharsets .UTF_8 )));
226230 JSONObject response = new JSONArray (download .asString ()).getJSONObject (0 );
227231 if (response != null ) {
228232 agency = Optional .ofNullable (response .optString ("RA" ));
@@ -235,18 +239,20 @@ public Optional<String> getAgency(DOI doi) throws FetcherException, MalformedURL
235239 return agency ;
236240 }
237241
238- private void setPageCountToArticleId (BibEntry entry , String doiAsString ) {
242+ private void setPageNumbersBasedOnDoi (BibEntry entry , DOI doi ) {
243+ String doiAsString = doi .asString ();
239244 String articleId = doiAsString .substring (doiAsString .lastIndexOf ('.' ) + 1 );
240245 entry .setField (StandardField .PAGES , articleId );
241246 }
242247
243248 // checks if the entry is an APS journal by comparing the organization id and the suffix format
244- private boolean isAPSJournal (BibEntry entry , String doiAsString ) {
249+ private boolean isAPSJournal (BibEntry entry , DOI doi ) {
245250 if (!entry .getType ().equals (StandardEntryType .Article )) {
246251 return false ;
247252 }
248- String suffix = doiAsString .substring (doiAsString .lastIndexOf ('/' ) + 1 );
249- String organizationId = doiAsString .substring (doiAsString .indexOf ('.' ) + 1 , doiAsString .indexOf ('/' ));
253+ String doiString = doi .asString ();
254+ String suffix = doiString .substring (doiString .lastIndexOf ('/' ) + 1 );
255+ String organizationId = doiString .substring (doiString .indexOf ('.' ) + 1 , doiString .indexOf ('/' ));
250256 return APS_JOURNAL_ORG_DOI_ID .equals (organizationId ) && APS_SUFFIX_PATTERN .matcher (suffix ).matches ();
251257 }
252258}
0 commit comments