99import java .io .BufferedInputStream ;
1010import java .io .IOException ;
1111import java .io .InputStream ;
12+ import java .net .MalformedURLException ;
13+ import java .net .URI ;
1214import java .net .URL ;
1315import java .nio .charset .Charset ;
1416import java .util .*;
@@ -20,12 +22,38 @@ public class WarcRecordStreamFactory {
2022
2123 private static final List <WarcRecordType > EVERY_WARC_RECORD_TYPE = Arrays .asList (WarcRecordType .values ());
2224
25+ public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URI uri ) {
26+ try {
27+ return WarcRecordStreamFactory .streamOf (uri .toURL (), EVERY_WARC_RECORD_TYPE );
28+ } catch (MalformedURLException e ) {
29+ throw new WarcNetworkException ("Unable to parse WARC location: " + uri + "!" , e );
30+ }
31+ }
32+
33+ public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URI uri ,
34+ @ NotNull @ NonNull final WarcRecordType ... requiredRecordTypes ) {
35+ try {
36+ return streamOf (uri .toURL (), requiredRecordTypes );
37+ } catch (MalformedURLException e ) {
38+ throw new WarcNetworkException ("Unable to parse WARC location: " + uri + "!" , e );
39+ }
40+ }
41+
42+ public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URI uri ,
43+ @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
44+ try {
45+ return streamOf (uri .toURL (), requiredRecordTypes );
46+ } catch (MalformedURLException e ) {
47+ throw new WarcNetworkException ("Unable to parse WARC location: " + uri + "!" , e );
48+ }
49+ }
50+
2351 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URL url ) {
2452 return WarcRecordStreamFactory .streamOf (url , EVERY_WARC_RECORD_TYPE );
2553 }
2654
2755 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URL url ,
28- @ NotNull @ NonNull final WarcRecordType ... requiredRecordTypes ) {
56+ @ NotNull @ NonNull final WarcRecordType ... requiredRecordTypes ) {
2957 try {
3058 return streamOf (new AvailableInputStream (new BufferedInputStream (url .openStream ())),
3159 WarcReader .DEFAULT_CHARSET , true , List .of (requiredRecordTypes ));
@@ -35,7 +63,7 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNu
3563 }
3664
3765 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final URL url ,
38- @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
66+ @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
3967 try {
4068 return streamOf (new AvailableInputStream (new BufferedInputStream (url .openStream ())),
4169 WarcReader .DEFAULT_CHARSET , true , requiredRecordTypes );
@@ -49,22 +77,22 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNu
4977 }
5078
5179 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final InputStream warcFileLocation ,
52- @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
80+ @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
5381 return streamOf (warcFileLocation , WarcReader .DEFAULT_CHARSET , requiredRecordTypes );
5482 }
5583
5684 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final InputStream warcFileLocation ,
57- @ NotNull @ NonNull final Charset charset ) {
85+ @ NotNull @ NonNull final Charset charset ) {
5886 return streamOf (new BufferedInputStream (warcFileLocation ), charset , true , EVERY_WARC_RECORD_TYPE );
5987 }
6088
6189 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final InputStream warcFileLocation ,
62- @ NotNull @ NonNull final Charset charset , @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
90+ @ NotNull @ NonNull final Charset charset , @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
6391 return streamOf (new BufferedInputStream (warcFileLocation ), charset , true , requiredRecordTypes );
6492 }
6593
6694 public static <T extends WarcContentBlock > Stream <WarcRecord <T >> streamOf (@ NotNull @ NonNull final InputStream inputStream ,
67- @ NotNull @ NonNull final Charset charset , final boolean compressed ) {
95+ @ NotNull @ NonNull final Charset charset , final boolean compressed ) {
6896 return streamOf (inputStream , charset , compressed , EVERY_WARC_RECORD_TYPE );
6997 }
7098
@@ -75,7 +103,7 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(
75103 @ NotNull @ NonNull final List <WarcRecordType > requiredRecordTypes ) {
76104 final WarcReader warcReader = new WarcReader (inputStream , charset , compressed );
77105 return StreamSupport .stream (Spliterators .spliteratorUnknownSize (
78- new SafeWarcRecordIterator (warcReader ), Spliterator .ORDERED | Spliterator .NONNULL ), false )
106+ new SafeWarcRecordIterator (warcReader ), Spliterator .ORDERED | Spliterator .NONNULL ), false )
79107 .filter (warcRecord -> requiredRecordTypes .contains (warcRecord .getType ()))
80108 .map (warcRecord -> ((WarcRecord <T >) warcRecord ));
81109 }
0 commit comments