From 7ebd473bfba6771ae251146ebd0c09e3a4e01aba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martynas=20Jusevi=C4=8Dius?= Date: Mon, 16 Mar 2026 01:21:10 +0100 Subject: [PATCH] Add initial test suite (19 tests) and upgrade dependencies Adds CSVStreamRDFProcessorTest and CSVStreamRDFOutputTest covering IRI percent-encoding, literal datatypes, null cell/header skipping, counter resets, delimiter handling, and maxCharsPerColumn enforcement. Also upgrades Jena to 6.0.0, picocli to 4.7.7, and various Maven plugin versions; adds JUnit Jupiter 5.11.4 and Mockito 5.14.2 test dependencies; migrates publishing from nexus-staging to central-publishing-maven-plugin. Co-Authored-By: Claude Sonnet 4.6 --- pom.xml | 49 +++-- .../etl/csv/ModelTransformerTest.java | 85 ++++++++ .../csv/stream/CSVStreamRDFOutputTest.java | 93 ++++++++ .../csv/stream/CSVStreamRDFProcessorTest.java | 204 ++++++++++++++++++ 4 files changed, 415 insertions(+), 16 deletions(-) create mode 100644 src/test/java/com/atomgraph/etl/csv/ModelTransformerTest.java create mode 100644 src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFOutputTest.java create mode 100644 src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFProcessorTest.java diff --git a/pom.xml b/pom.xml index 1beff1a..a101e9b 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.atomgraph.etl.csv csv2rdf - 2.1.12-SNAPSHOT + 2.2.0-SNAPSHOT jar CSV2RDF @@ -41,6 +41,18 @@ + + org.junit.jupiter + junit-jupiter + 5.11.4 + test + + + org.mockito + mockito-junit-jupiter + 5.14.2 + test + org.slf4j slf4j-simple @@ -49,7 +61,7 @@ org.apache.jena jena-arq - 4.7.0 + 6.0.0 com.univocity @@ -60,17 +72,22 @@ info.picocli picocli - 4.0.4 + 4.7.7 csv2rdf + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.2 + org.apache.maven.plugins maven-compiler-plugin - 3.10.0 + 3.14.1 17 @@ -78,7 +95,7 @@ org.apache.maven.plugins maven-release-plugin - 2.5.3 + 3.3.1 release @@ -112,21 +129,21 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.12.0 UTF-8 *.impl - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.12 + org.sonatype.central + central-publishing-maven-plugin + 0.9.0 true - ossrh - https://oss.sonatype.org/ - true + central-portal-snapshots + true + published @@ -134,8 +151,8 @@ - ossrh - https://oss.sonatype.org/content/repositories/snapshots + central-portal-snapshots + https://central.sonatype.com/repository/maven-snapshots/ @@ -147,7 +164,7 @@ org.apache.maven.plugins maven-source-plugin - 3.1.0 + 3.4.0 attach-sources @@ -160,7 +177,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.12.0 UTF-8 *.impl diff --git a/src/test/java/com/atomgraph/etl/csv/ModelTransformerTest.java b/src/test/java/com/atomgraph/etl/csv/ModelTransformerTest.java new file mode 100644 index 0000000..9722a40 --- /dev/null +++ b/src/test/java/com/atomgraph/etl/csv/ModelTransformerTest.java @@ -0,0 +1,85 @@ +/** + * Copyright 2026 Martynas Jusevičius + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.atomgraph.etl.csv; + +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.vocabulary.RDF; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class ModelTransformerTest +{ + + private final ModelTransformer transformer = new ModelTransformer(); + + @Test + void applyIdentityConstructReturnsIsomorphicModel() + { + Model input = ModelFactory.createDefaultModel(); + input.add( + input.createResource("http://example.com/s"), + RDF.type, + input.createResource("http://example.com/Type") + ); + + Query query = QueryFactory.create("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); + Model result = transformer.apply(query, input); + + assertTrue(result.isIsomorphicWith(input)); + } + + @Test + void applyEmptyInputReturnsEmptyModel() + { + Model input = ModelFactory.createDefaultModel(); + Query query = QueryFactory.create("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); + + Model result = transformer.apply(query, input); + + assertTrue(result.isEmpty()); + } + + @Test + void applyFilteringConstructReturnsSubset() + { + Model input = ModelFactory.createDefaultModel(); + input.add( + input.createResource("http://example.com/s"), + input.createProperty("http://example.com/#name"), + "Alice" + ); + input.add( + input.createResource("http://example.com/s"), + input.createProperty("http://example.com/#age"), + "30" + ); + + // Only map name, not age + Query query = QueryFactory.create( + "PREFIX ex: " + + "CONSTRUCT { ?s ex:name ?name } WHERE { ?s ex:name ?name }" + ); + Model result = transformer.apply(query, input); + + assertEquals(1, result.size()); + } + +} diff --git a/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFOutputTest.java b/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFOutputTest.java new file mode 100644 index 0000000..3221616 --- /dev/null +++ b/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFOutputTest.java @@ -0,0 +1,93 @@ +/** + * Copyright 2026 Martynas Jusevičius + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.atomgraph.etl.csv.stream; + +import com.univocity.parsers.common.TextParsingException; +import java.io.StringReader; +import java.io.StringWriter; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class CSVStreamRDFOutputTest +{ + + private static final String BASE = "http://example.com/"; + private static final Query IDENTITY_QUERY = QueryFactory.create("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); + + @Test + void writeProducesNonEmptyRDFOutput() + { + String csv = "name,age\nAlice,30\nBob,25\n"; + CSVStreamRDFOutput output = new CSVStreamRDFOutput(new StringReader(csv), BASE, IDENTITY_QUERY, ',', null); + + StringWriter writer = new StringWriter(); + output.write(writer); + + assertFalse(writer.toString().isBlank()); + } + + @Test + void writeWithCustomDelimiterParsesCorrectly() + { + String csv = "name;age\nAlice;30\n"; + CSVStreamRDFOutput output = new CSVStreamRDFOutput(new StringReader(csv), BASE, IDENTITY_QUERY, ';', null); + + output.write(new StringWriter()); + + assertEquals(1, output.getCSVStreamRDFProcessor().getSubjectCount()); + assertEquals(2, output.getCSVStreamRDFProcessor().getTripleCount()); + } + + @Test + void writeTwoRowsCountsSubjectsCorrectly() + { + String csv = "name,age\nAlice,30\nBob,25\n"; + CSVStreamRDFOutput output = new CSVStreamRDFOutput(new StringReader(csv), BASE, IDENTITY_QUERY, ',', null); + + output.write(new StringWriter()); + + assertEquals(2, output.getCSVStreamRDFProcessor().getSubjectCount()); + assertEquals(4, output.getCSVStreamRDFProcessor().getTripleCount()); + } + + @Test + void headerOnlyProducesEmptyOutput() + { + String csv = "name,age\n"; + CSVStreamRDFOutput output = new CSVStreamRDFOutput(new StringReader(csv), BASE, IDENTITY_QUERY, ',', null); + + output.write(new StringWriter()); + + assertEquals(0, output.getCSVStreamRDFProcessor().getSubjectCount()); + assertEquals(0, output.getCSVStreamRDFProcessor().getTripleCount()); + } + + @Test + void maxCharsPerColumnThrowsOnLongValue() + { + // "name" header (4 chars) fits within maxCharsPerColumn=5; + // the data value "Hello World" (11 chars) exceeds it and must cause a TextParsingException + String csv = "name\nHello World\n"; + CSVStreamRDFOutput output = new CSVStreamRDFOutput(new StringReader(csv), BASE, IDENTITY_QUERY, ',', 5); + + assertThrows(TextParsingException.class, () -> output.write(new StringWriter())); + } + +} diff --git a/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFProcessorTest.java b/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFProcessorTest.java new file mode 100644 index 0000000..9f630d0 --- /dev/null +++ b/src/test/java/com/atomgraph/etl/csv/stream/CSVStreamRDFProcessorTest.java @@ -0,0 +1,204 @@ +/** + * Copyright 2026 Martynas Jusevičius + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.atomgraph.etl.csv.stream; + +import com.univocity.parsers.common.ParsingContext; +import java.io.StringWriter; +import org.apache.jena.datatypes.xsd.XSDDatatype; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.rdf.model.Literal; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFLib; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +public class CSVStreamRDFProcessorTest +{ + + private static final String BASE = "http://example.com/"; + private static final Query IDENTITY_QUERY = QueryFactory.create("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"); + + @Mock + private ParsingContext context; + + private final StreamRDF stream = StreamRDFLib.writer(new StringWriter()); + + @Test + void constructorRejectsSelectQuery() + { + Query selectQuery = QueryFactory.create("SELECT * WHERE { ?s ?p ?o }"); + assertThrows(IllegalArgumentException.class, () -> new CSVStreamRDFProcessor(stream, BASE, selectQuery)); + } + + @Test + void transformRowCreatesTriples() + { + when(context.headers()).thenReturn(new String[]{"name", "age"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"Alice", "30"}, context); + + assertEquals(2, result.size()); + assertEquals(1, processor.getSubjectCount()); + assertEquals(2, processor.getTripleCount()); + } + + @Test + void transformRowSkipsNullCells() + { + when(context.headers()).thenReturn(new String[]{"name", "age"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{null, "30"}, context); + + assertEquals(1, result.size()); + assertEquals(1, processor.getTripleCount()); + } + + @Test + void transformRowSkipsNullHeader() + { + when(context.headers()).thenReturn(new String[]{"name", null}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"Alice", "30"}, context); + + assertEquals(1, result.size()); + assertEquals(1, processor.getTripleCount()); + } + + @Test + void processStartedResetsCounters() + { + when(context.headers()).thenReturn(new String[]{"name"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + processor.transformRow(new String[]{"Alice"}, context); + assertEquals(1, processor.getSubjectCount()); + + processor.processStarted(context); + assertEquals(0, processor.getSubjectCount()); + assertEquals(0, processor.getTripleCount()); + } + + @Test + void propertyIriUsesBaseAndEncodedHeader() + { + when(context.headers()).thenReturn(new String[]{"name"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"Alice"}, context); + + Statement stmt = result.listStatements().nextStatement(); + assertEquals("http://example.com/#name", stmt.getPredicate().getURI()); + } + + @Test + void headerWithSpacesIsPercentEncoded() + { + when(context.headers()).thenReturn(new String[]{"first name"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"Alice"}, context); + + Statement stmt = result.listStatements().nextStatement(); + assertTrue(stmt.getPredicate().getURI().contains("first%20name")); + } + + @Test + void headerWithHashIsPercentEncoded() + { + when(context.headers()).thenReturn(new String[]{"#col"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"val"}, context); + + Statement stmt = result.listStatements().nextStatement(); + assertTrue(stmt.getPredicate().getURI().contains("%23col")); + } + + @Test + void headerWithSlashIsPercentEncoded() + { + when(context.headers()).thenReturn(new String[]{"a/b"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"val"}, context); + + Statement stmt = result.listStatements().nextStatement(); + assertTrue(stmt.getPredicate().getURI().contains("a%2Fb")); + } + + @Test + void cellValueIsPlainStringLiteral() + { + when(context.headers()).thenReturn(new String[]{"name"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"Alice"}, context); + + Literal lit = result.listStatements().nextStatement().getLiteral(); + assertEquals("Alice", lit.getString()); + assertTrue(lit.getLanguage().isEmpty()); + assertTrue(lit.getDatatypeURI() == null + || lit.getDatatypeURI().equals(XSDDatatype.XSDstring.getURI())); + } + + @Test + void numericCellIsStillStringLiteral() + { + when(context.headers()).thenReturn(new String[]{"age"}); + + CSVStreamRDFProcessor processor = new CSVStreamRDFProcessor(stream, BASE, IDENTITY_QUERY); + processor.processStarted(context); + + Model result = processor.transformRow(new String[]{"42"}, context); + + Literal lit = result.listStatements().nextStatement().getLiteral(); + assertEquals("42", lit.getString()); + assertTrue(lit.getLanguage().isEmpty()); + assertTrue(lit.getDatatypeURI() == null + || lit.getDatatypeURI().equals(XSDDatatype.XSDstring.getURI())); + } + +}