Skip to content

Commit 452fadc

Browse files
committed
#9 - Added URI based stream factories to WarcRecordStreamFactory.
1 parent fc151f4 commit 452fadc

File tree

1 file changed

+35
-7
lines changed

1 file changed

+35
-7
lines changed

java-warc/src/main/java/com/github/bottomlessarchive/warc/service/WarcRecordStreamFactory.java

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import java.io.BufferedInputStream;
1010
import java.io.IOException;
1111
import java.io.InputStream;
12+
import java.net.MalformedURLException;
13+
import java.net.URI;
1214
import java.net.URL;
1315
import java.nio.charset.Charset;
1416
import java.util.*;
@@ -20,12 +22,38 @@ public class WarcRecordStreamFactory {
2022

2123
private static final List<WarcRecordType> EVERY_WARC_RECORD_TYPE = Arrays.asList(WarcRecordType.values());
2224

25+
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URI uri) {
26+
try {
27+
return WarcRecordStreamFactory.streamOf(uri.toURL(), EVERY_WARC_RECORD_TYPE);
28+
} catch (MalformedURLException e) {
29+
throw new WarcNetworkException("Unable to parse WARC location: " + uri + "!", e);
30+
}
31+
}
32+
33+
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URI uri,
34+
@NotNull @NonNull final WarcRecordType... requiredRecordTypes) {
35+
try {
36+
return streamOf(uri.toURL(), requiredRecordTypes);
37+
} catch (MalformedURLException e) {
38+
throw new WarcNetworkException("Unable to parse WARC location: " + uri + "!", e);
39+
}
40+
}
41+
42+
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URI uri,
43+
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
44+
try {
45+
return streamOf(uri.toURL(), requiredRecordTypes);
46+
} catch (MalformedURLException e) {
47+
throw new WarcNetworkException("Unable to parse WARC location: " + uri + "!", e);
48+
}
49+
}
50+
2351
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URL url) {
2452
return WarcRecordStreamFactory.streamOf(url, EVERY_WARC_RECORD_TYPE);
2553
}
2654

2755
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URL url,
28-
@NotNull @NonNull final WarcRecordType... requiredRecordTypes) {
56+
@NotNull @NonNull final WarcRecordType... requiredRecordTypes) {
2957
try {
3058
return streamOf(new AvailableInputStream(new BufferedInputStream(url.openStream())),
3159
WarcReader.DEFAULT_CHARSET, true, List.of(requiredRecordTypes));
@@ -35,7 +63,7 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNu
3563
}
3664

3765
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final URL url,
38-
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
66+
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
3967
try {
4068
return streamOf(new AvailableInputStream(new BufferedInputStream(url.openStream())),
4169
WarcReader.DEFAULT_CHARSET, true, requiredRecordTypes);
@@ -49,22 +77,22 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNu
4977
}
5078

5179
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final InputStream warcFileLocation,
52-
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
80+
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
5381
return streamOf(warcFileLocation, WarcReader.DEFAULT_CHARSET, requiredRecordTypes);
5482
}
5583

5684
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final InputStream warcFileLocation,
57-
@NotNull @NonNull final Charset charset) {
85+
@NotNull @NonNull final Charset charset) {
5886
return streamOf(new BufferedInputStream(warcFileLocation), charset, true, EVERY_WARC_RECORD_TYPE);
5987
}
6088

6189
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final InputStream warcFileLocation,
62-
@NotNull @NonNull final Charset charset, @NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
90+
@NotNull @NonNull final Charset charset, @NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
6391
return streamOf(new BufferedInputStream(warcFileLocation), charset, true, requiredRecordTypes);
6492
}
6593

6694
public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(@NotNull @NonNull final InputStream inputStream,
67-
@NotNull @NonNull final Charset charset, final boolean compressed) {
95+
@NotNull @NonNull final Charset charset, final boolean compressed) {
6896
return streamOf(inputStream, charset, compressed, EVERY_WARC_RECORD_TYPE);
6997
}
7098

@@ -75,7 +103,7 @@ public static <T extends WarcContentBlock> Stream<WarcRecord<T>> streamOf(
75103
@NotNull @NonNull final List<WarcRecordType> requiredRecordTypes) {
76104
final WarcReader warcReader = new WarcReader(inputStream, charset, compressed);
77105
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
78-
new SafeWarcRecordIterator(warcReader), Spliterator.ORDERED | Spliterator.NONNULL), false)
106+
new SafeWarcRecordIterator(warcReader), Spliterator.ORDERED | Spliterator.NONNULL), false)
79107
.filter(warcRecord -> requiredRecordTypes.contains(warcRecord.getType()))
80108
.map(warcRecord -> ((WarcRecord<T>) warcRecord));
81109
}

0 commit comments

Comments
 (0)