Skip to content

Commit 6aa8123

Browse files
committed
fix(dataformat): ignore empty XML elements that contain only whitespaces
1 parent 79deb60 commit 6aa8123

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

connect-file-pulse-dataformat/src/main/java/io/streamthoughts/kafka/connect/filepulse/xml/XMLNodeToStructConverter.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io.streamthoughts.kafka.connect.filepulse.data.TypedField;
2626
import io.streamthoughts.kafka.connect.filepulse.data.TypedStruct;
2727
import io.streamthoughts.kafka.connect.filepulse.data.TypedValue;
28+
import io.streamthoughts.kafka.connect.filepulse.internal.StringUtils;
2829
import io.streamthoughts.kafka.connect.filepulse.reader.ReaderException;
2930
import org.slf4j.Logger;
3031
import org.slf4j.LoggerFactory;
@@ -161,7 +162,7 @@ private TypedValue convertObjectTree(final Node node,
161162
TypedStruct container = TypedStruct.create();
162163
getNotExcludedNodeAttributes(node).forEach(container::put);
163164
for (Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
164-
// Text nodes always return #text" as the node name, so it's best to use the parent node name instead.
165+
// Text nodes always return #text as the node name, so it's best to use the parent node name instead.
165166
final String childNodeName = isTextNode(child) ? nodeName : determineNodeName(child);
166167
Optional<TypedValue> optional = readObjectNodeValue(
167168
child,
@@ -309,10 +310,10 @@ private static Optional<String> peekChildCDataNodeTextValue(final Node node) {
309310

310311
if (nonNewLineNodes.size() == 1) {
311312
final Node child = nonNewLineNodes.get(0);
312-
if (isTextNode(child)) {
313-
// Text content can be an empty string.
313+
if (isWhitespaceOrNewLineNodeElement(child))
314+
return Optional.empty();
315+
if (isTextNode(child))
314316
return Optional.of(child.getTextContent());
315-
}
316317
}
317318

318319
return Optional.empty();
@@ -347,7 +348,7 @@ private static boolean isNotXmlNamespace(final Node node) {
347348
}
348349

349350
private static boolean isWhitespaceOrNewLineNodeElement(final Node node) {
350-
return node != null && isTextNode(node) && node.getTextContent().trim().isEmpty();
351+
return node != null && isTextNode(node) && StringUtils.isBlank(node.getTextContent());
351352
}
352353

353354
private String determineNodeName(final Node node) {

connect-file-pulse-dataformat/src/test/java/io/streamthoughts/kafka/connect/filepulse/xml/XMLNodeToStructConverterTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,4 +132,61 @@ public void should_convert_given_single_text_node_element_with_attrs() throws Ex
132132
Assert.assertEquals("test", root.getString("text"));
133133
Assert.assertEquals("attr", root.getString("attr"));
134134
}
135+
136+
@Test
137+
public void should_ignore_element_given_xml_tag_with_whitespaces() throws Exception {
138+
// Given
139+
final byte[] bytes = "<root><empty> </empty><data>text</data></root>".getBytes();
140+
final XMLNodeToStructConverter converter = new XMLNodeToStructConverter()
141+
.setContentFieldName("text")
142+
.setExcludeEmptyElement(true);
143+
144+
// When
145+
final Document document = reader.parse(new ByteArrayInputStream(bytes));
146+
TypedStruct struct = converter.apply(document);
147+
148+
// Then
149+
Assert.assertNotNull(struct);
150+
TypedStruct root = struct.getStruct("root");
151+
Assert.assertNotNull(root);
152+
Assert.assertFalse(root.has("empty"));
153+
}
154+
155+
@Test
156+
public void should_ignore_empty_element_given_self_closing_xml_tag() throws Exception {
157+
// Given
158+
final byte[] bytes = "<root><empty/><data>text</data></root>".getBytes();
159+
final XMLNodeToStructConverter converter = new XMLNodeToStructConverter()
160+
.setContentFieldName("text")
161+
.setExcludeEmptyElement(true);
162+
163+
// When
164+
final Document document = reader.parse(new ByteArrayInputStream(bytes));
165+
TypedStruct struct = converter.apply(document);
166+
167+
// Then
168+
Assert.assertNotNull(struct);
169+
TypedStruct root = struct.getStruct("root");
170+
Assert.assertNotNull(root);
171+
Assert.assertFalse(root.has("empty"));
172+
}
173+
174+
@Test
175+
public void should_ignore_empty_element_given_open_and_close_xml_tags() throws Exception {
176+
// Given
177+
final byte[] bytes = "<root><empty></empty><data>text</data></root>".getBytes();
178+
final XMLNodeToStructConverter converter = new XMLNodeToStructConverter()
179+
.setContentFieldName("text")
180+
.setExcludeEmptyElement(true);
181+
182+
// When
183+
final Document document = reader.parse(new ByteArrayInputStream(bytes));
184+
TypedStruct struct = converter.apply(document);
185+
186+
// Then
187+
Assert.assertNotNull(struct);
188+
TypedStruct root = struct.getStruct("root");
189+
Assert.assertNotNull(root);
190+
Assert.assertFalse(root.has("empty"));
191+
}
135192
}

0 commit comments

Comments
 (0)