From 44b8af9366eeb63bcfc0e38c8929421bcd8ccb95 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 15:30:11 -0700 Subject: [PATCH 1/4] test(amber): add unit test coverage for LogicalLink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin both construction paths and the production Jackson behavior: - Primary case-class constructor: four fields exposed as constructed. - Secondary @JsonCreator constructor: wraps raw String op ids in OperatorIdentity (the Jackson deserialization path used for user-saved workflow JSON). - Case-class equals / hashCode across the four fields, including field-discriminating tests (fromOpId, toPortId.internal) and acceptance of self-loop links (same fromOpId / toOpId, distinct ports — higher layers reject cycles, the data type does not). - @JsonCreator path accepts dashes / dots / digits and the empty string without normalization (no validation in the data type). - Production-mapper Jackson coverage via JSONUtils.objectMapper: - treeToValue with raw String op-id JSON dispatches to the @JsonCreator string overload and produces the expected fields. - @JsonProperty pins the on-the-wire key names (`fromOpId` / `toOpId` / `fromPortId` / `toPortId`). - writeValueAsString → readValue is NOT round-trippable: the string overload can't accept the object-shape OperatorIdentity that writeValueAsString emits. Asymmetry pinned so a future fix (object @JsonCreator overload or custom @JsonDeserialize) flips this on purpose. - Missing string op-id fields silently default to OperatorIdentity(null) — no validation in the @JsonCreator path; future hardening should fail this on purpose. Closes #4955 --- .../texera/workflow/LogicalLinkSpec.scala | 221 ++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala new file mode 100644 index 00000000000..aa8576f1c98 --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.workflow + +import com.fasterxml.jackson.databind.exc.MismatchedInputException +import com.fasterxml.jackson.databind.node.ObjectNode +import org.apache.texera.amber.core.virtualidentity.OperatorIdentity +import org.apache.texera.amber.core.workflow.PortIdentity +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec + +class LogicalLinkSpec extends AnyFlatSpec { + + // --------------------------------------------------------------------------- + // Primary constructor + case-class semantics + // --------------------------------------------------------------------------- + + "LogicalLink primary constructor" should "expose the four fields it was constructed with" in { + val link = LogicalLink( + fromOpId = OperatorIdentity("op-A"), + fromPortId = PortIdentity(0), + toOpId = OperatorIdentity("op-B"), + toPortId = PortIdentity(1, internal = true) + ) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.fromPortId == PortIdentity(0)) + assert(link.toOpId == OperatorIdentity("op-B")) + assert(link.toPortId == PortIdentity(1, internal = true)) + } + + "LogicalLink case-class equality" should "use structural equality across all four fields" in { + val a = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + val b = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + assert(a == b) + assert(a.hashCode == b.hashCode) + } + + it should "distinguish links that differ only in fromOpId" in { + val a = + LogicalLink(OperatorIdentity("x"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + val b = + LogicalLink(OperatorIdentity("z"), PortIdentity(0), OperatorIdentity("y"), PortIdentity(1)) + assert(a != b) + } + + it should "distinguish links that differ only in toPortId.internal" in { + val a = LogicalLink( + OperatorIdentity("x"), + PortIdentity(0), + OperatorIdentity("y"), + PortIdentity(1, internal = false) + ) + val b = LogicalLink( + OperatorIdentity("x"), + PortIdentity(0), + OperatorIdentity("y"), + PortIdentity(1, internal = true) + ) + assert(a != b) + } + + it should "consider a self-loop link well-formed (same fromOpId / toOpId, distinct ports)" in { + // Self-loops aren't structurally invalid at the LogicalLink level — + // higher layers reject cycles, but the data type allows fromOpId == + // toOpId. Pin so a future == check on construction breaks this on + // purpose. + val selfLoop = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-A"), + PortIdentity(1) + ) + assert(selfLoop.fromOpId == selfLoop.toOpId) + } + + // --------------------------------------------------------------------------- + // Secondary @JsonCreator constructor (string opId variant) + // --------------------------------------------------------------------------- + + "LogicalLink secondary @JsonCreator constructor" should "wrap raw String op ids in OperatorIdentity" in { + val link = new LogicalLink( + fromOpId = "op-A", + fromPortId = PortIdentity(0), + toOpId = "op-B", + toPortId = PortIdentity(1) + ) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.toOpId == OperatorIdentity("op-B")) + // Equal to a link built via the primary constructor. + assert( + link == LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + ) + } + + it should "accept identifiers containing dashes / dots / digits (no normalization)" in { + val link = new LogicalLink("my.op-1", PortIdentity(0), "my.op-2", PortIdentity(1)) + assert(link.fromOpId == OperatorIdentity("my.op-1")) + assert(link.toOpId == OperatorIdentity("my.op-2")) + } + + it should "accept the empty string as an op id (no validation in the data type)" in { + // Pin: the secondary constructor does not validate; an empty string + // wraps into `OperatorIdentity("")`. A future change adding non-empty + // validation should fail this test on purpose. + val link = new LogicalLink("", PortIdentity(0), "", PortIdentity(1)) + assert(link.fromOpId == OperatorIdentity("")) + assert(link.toOpId == OperatorIdentity("")) + } + + // --------------------------------------------------------------------------- + // Jackson round-trip (production objectMapper) + // --------------------------------------------------------------------------- + // + // These tests use the same `JSONUtils.objectMapper` that production uses + // to read user-saved workflow JSON, so a regression in the Jackson + // wiring (annotations, default-Scala-module config) surfaces here. + + "LogicalLink Jackson deserialization" should + "deserialize fromOpId / toOpId from raw String values via the secondary @JsonCreator constructor" in { + // Build the JSON by hand to mimic a user-saved workflow file where + // `fromOpId` and `toOpId` are written as plain strings (the only shape + // production actually receives, since the frontend emits them as + // strings). Jackson dispatches to the @JsonCreator string-overload + // constructor. + val node = objectMapper.createObjectNode() + node.put("fromOpId", "op-A") + node.set[ObjectNode]("fromPortId", objectMapper.valueToTree(PortIdentity(0))) + node.put("toOpId", "op-B") + node.set[ObjectNode]("toPortId", objectMapper.valueToTree(PortIdentity(1))) + val link = objectMapper.treeToValue(node, classOf[LogicalLink]) + assert(link.fromOpId == OperatorIdentity("op-A")) + assert(link.toOpId == OperatorIdentity("op-B")) + assert(link.fromPortId == PortIdentity(0)) + assert(link.toPortId == PortIdentity(1)) + } + + it should "use the documented `fromOpId` / `toOpId` JSON field names on serialization" in { + // The `@JsonProperty` annotations pin the on-the-wire key names, which + // saved workflow files depend on. A renamed Scala field would + // silently break a project's existing JSON if these annotations were + // removed. + val link = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val tree = objectMapper.valueToTree[com.fasterxml.jackson.databind.JsonNode](link) + assert(tree.has("fromOpId")) + assert(tree.has("toOpId")) + assert(tree.has("fromPortId")) + assert(tree.has("toPortId")) + } + + it should "NOT round-trip through writeValueAsString (the @JsonCreator string overload is incompatible with the object-shape OperatorIdentity that writeValueAsString emits)" in { + // This is a real asymmetry worth pinning: production reads user-saved + // workflow JSON where `fromOpId`/`toOpId` are plain strings, but + // `objectMapper.writeValueAsString` writes OperatorIdentity as + // `{"id":"op-A"}` (the case-class object form). Re-reading that + // emitted JSON fails because Jackson dispatches on the @JsonCreator + // string-overload, which can't accept an object for fromOpId. + // A future fix that adds a third @JsonCreator (object overload) or + // a custom @JsonDeserialize on `fromOpId`/`toOpId` will turn this + // into a passing round-trip and break this assertion on purpose. + val original = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val json = objectMapper.writeValueAsString(original) + // Confirm writeValueAsString emits the object form. + assert(json.contains("\"fromOpId\":{\"id\":\"op-A\"}")) + // Re-reading fails because the @JsonCreator string overload can't + // accept an object for fromOpId. + intercept[MismatchedInputException] { + objectMapper.readValue(json, classOf[LogicalLink]) + } + } + + it should "silently default missing string op-id fields to OperatorIdentity(null) (no validation in the @JsonCreator path)" in { + // Pin: omitting `fromOpId` / `toOpId` does not throw — Jackson invokes + // the @JsonCreator with `null` for the missing String args, and + // `OperatorIdentity(null)` is a perfectly constructable case-class + // instance. A future change adding a non-null check at construction + // time should fail this on purpose. + val empty = objectMapper.createObjectNode() + val link = objectMapper.treeToValue(empty, classOf[LogicalLink]) + assert(link.fromOpId == OperatorIdentity(null.asInstanceOf[String])) + assert(link.toOpId == OperatorIdentity(null.asInstanceOf[String])) + // PortIdentity fields default to null too (the @JsonCreator does not + // supply scalapb defaults for missing object fields — Jackson passes + // null for the typed PortIdentity arg). + assert(link.fromPortId == null) + assert(link.toPortId == null) + } +} From 24b7e37c95607299fbe67251e4b14a1a034f5914 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 15:44:19 -0700 Subject: [PATCH 2/4] test(amber): tighten LogicalLinkSpec Jackson assertions per Copilot review Address Copilot feedback on #4956: - Replace the brittle `json.contains(...)` raw-string check on the writeValueAsString output with `objectMapper.readTree(json)` parse + structural assertions (`tree.path("fromOpId").isObject` / `path("id").asText() == "op-A"`). No longer depends on exact key ordering or escaping. - Drop the `node.set[ObjectNode](..., objectMapper.valueToTree(...))` pattern. The `[ObjectNode]` type parameter forces a runtime cast that assumes PortIdentity always serializes to an ObjectNode, which could throw ClassCastException if PortIdentity's serialization changes. Use `node.set("fromPortId", objectMapper.valueToTree[ JsonNode](...))` instead. - Replace `OperatorIdentity(null.asInstanceOf[String])` with the cleaner `link.fromOpId.id == null` assertion. Same intent (verify the op-id field is null after deserialization with missing JSON field), no `null.asInstanceOf[String]` noise. --- .../texera/workflow/LogicalLinkSpec.scala | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index aa8576f1c98..c860456e6e2 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -19,8 +19,8 @@ package org.apache.texera.workflow +import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.exc.MismatchedInputException -import com.fasterxml.jackson.databind.node.ObjectNode import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity import org.apache.texera.amber.util.JSONUtils.objectMapper @@ -148,9 +148,9 @@ class LogicalLinkSpec extends AnyFlatSpec { // constructor. val node = objectMapper.createObjectNode() node.put("fromOpId", "op-A") - node.set[ObjectNode]("fromPortId", objectMapper.valueToTree(PortIdentity(0))) + node.set("fromPortId", objectMapper.valueToTree[JsonNode](PortIdentity(0))) node.put("toOpId", "op-B") - node.set[ObjectNode]("toPortId", objectMapper.valueToTree(PortIdentity(1))) + node.set("toPortId", objectMapper.valueToTree[JsonNode](PortIdentity(1))) val link = objectMapper.treeToValue(node, classOf[LogicalLink]) assert(link.fromOpId == OperatorIdentity("op-A")) assert(link.toOpId == OperatorIdentity("op-B")) @@ -169,7 +169,7 @@ class LogicalLinkSpec extends AnyFlatSpec { OperatorIdentity("op-B"), PortIdentity(1) ) - val tree = objectMapper.valueToTree[com.fasterxml.jackson.databind.JsonNode](link) + val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromOpId")) assert(tree.has("toOpId")) assert(tree.has("fromPortId")) @@ -193,10 +193,14 @@ class LogicalLinkSpec extends AnyFlatSpec { PortIdentity(1) ) val json = objectMapper.writeValueAsString(original) - // Confirm writeValueAsString emits the object form. - assert(json.contains("\"fromOpId\":{\"id\":\"op-A\"}")) - // Re-reading fails because the @JsonCreator string overload can't - // accept an object for fromOpId. + // Parse the emitted JSON and confirm the structural shape — fromOpId + // is an object with an `id` field of "op-A". Avoids depending on + // exact key ordering or escaping. + val tree = objectMapper.readTree(json) + assert(tree.path("fromOpId").isObject, s"expected fromOpId to be an object: $json") + assert(tree.path("fromOpId").path("id").asText() == "op-A") + // Re-reading the just-emitted JSON fails because the @JsonCreator + // String overload can't accept the object-shape fromOpId. intercept[MismatchedInputException] { objectMapper.readValue(json, classOf[LogicalLink]) } @@ -210,8 +214,8 @@ class LogicalLinkSpec extends AnyFlatSpec { // time should fail this on purpose. val empty = objectMapper.createObjectNode() val link = objectMapper.treeToValue(empty, classOf[LogicalLink]) - assert(link.fromOpId == OperatorIdentity(null.asInstanceOf[String])) - assert(link.toOpId == OperatorIdentity(null.asInstanceOf[String])) + assert(link.fromOpId.id == null) + assert(link.toOpId.id == null) // PortIdentity fields default to null too (the @JsonCreator does not // supply scalapb defaults for missing object fields — Jackson passes // null for the typed PortIdentity arg). From e1f706fab23e977ac3d3b3200c9b528fe390545b Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 5 May 2026 16:18:15 -0700 Subject: [PATCH 3/4] test(amber): tighten @JsonProperty pin + separate parameter-name pin Address Copilot follow-up on #4956: - The @JsonProperty pin previously claimed "all four fields" but only fromOpId/toOpId actually carry the annotation. Split into two tests: one pinning the @JsonProperty-annotated op-id keys, and a separate one explicitly pinning that fromPortId/toPortId derive from Scala parameter names (no annotation). The second test makes the parameter-rename failure mode visible: a parameter rename without an accompanying @JsonProperty annotation would silently break saved workflows. The "writeValueAsString does NOT round-trip" thread is reconciled by updating the linked issue (#4955) body to clarify that the spec characterizes the current asymmetry rather than asserting a passing round-trip; the follow-up fix that adds a custom JsonDeserialize for fromOpId/toOpId would flip the characterization-pin into a real round-trip. --- .../texera/workflow/LogicalLinkSpec.scala | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index c860456e6e2..b81f1a89fad 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -158,11 +158,11 @@ class LogicalLinkSpec extends AnyFlatSpec { assert(link.toPortId == PortIdentity(1)) } - it should "use the documented `fromOpId` / `toOpId` JSON field names on serialization" in { - // The `@JsonProperty` annotations pin the on-the-wire key names, which - // saved workflow files depend on. A renamed Scala field would - // silently break a project's existing JSON if these annotations were - // removed. + it should "emit `fromOpId` / `toOpId` JSON keys pinned by @JsonProperty annotations" in { + // Only `fromOpId` / `toOpId` carry `@JsonProperty` in `LogicalLink`; + // a Scala-side rename of either parameter would still keep the + // JSON key stable, which is the saved-workflow contract these + // annotations pin. val link = LogicalLink( OperatorIdentity("op-A"), PortIdentity(0), @@ -172,6 +172,21 @@ class LogicalLinkSpec extends AnyFlatSpec { val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromOpId")) assert(tree.has("toOpId")) + } + + it should "emit `fromPortId` / `toPortId` JSON keys derived from Scala parameter names (no @JsonProperty)" in { + // Pin: the port-id JSON keys come from Scala parameter names since + // there is no `@JsonProperty` annotation on those fields. A + // parameter rename WOULD silently break saved-workflow compatibility + // for these keys — pin so a future rename without an accompanying + // `@JsonProperty` annotation breaks this on purpose. + val link = LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + val tree = objectMapper.valueToTree[JsonNode](link) assert(tree.has("fromPortId")) assert(tree.has("toPortId")) } From cd5bb1713800079b543b967f26a2becb2e53401b Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 12 May 2026 19:39:18 -0700 Subject: [PATCH 4/4] feat(amber): reject self-loops and empty/null op-ids in LogicalLink Address PR review feedback: validate in the LogicalLink primary constructor that fromOpId / toOpId are non-null, non-empty, and distinct. Both construction paths (primary case-class + secondary @JsonCreator) now reject the three classes the reviewer flagged: - self-loop (fromOpId == toOpId) - null OperatorIdentity or OperatorIdentity wrapping null id - OperatorIdentity wrapping the empty string LogicalLinkSpec now asserts the new rejections instead of pinning them as "intentionally permissive" characterization tests. The harder writeValueAsString / readValue asymmetry is tracked separately in #5042; the existing characterization test now links the issue. --- .../apache/texera/workflow/LogicalLink.scala | 13 ++ .../texera/workflow/LogicalLinkSpec.scala | 137 ++++++++++++------ 2 files changed, 107 insertions(+), 43 deletions(-) diff --git a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala b/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala index 5bbc9164b72..e6553e3cdf1 100644 --- a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala +++ b/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala @@ -29,6 +29,19 @@ case class LogicalLink( @JsonProperty("toOpId") toOpId: OperatorIdentity, toPortId: PortIdentity ) { + require( + fromOpId != null && fromOpId.id != null && fromOpId.id.nonEmpty, + "LogicalLink fromOpId must be non-null and non-empty" + ) + require( + toOpId != null && toOpId.id != null && toOpId.id.nonEmpty, + "LogicalLink toOpId must be non-null and non-empty" + ) + require( + fromOpId != toOpId, + s"LogicalLink self-loop not allowed: fromOpId == toOpId == ${fromOpId.id}" + ) + @JsonCreator def this( @JsonProperty("fromOpId") fromOpId: String, diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index b81f1a89fad..bd56aa7d5f6 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -20,7 +20,7 @@ package org.apache.texera.workflow import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.exc.MismatchedInputException +import com.fasterxml.jackson.databind.exc.{MismatchedInputException, ValueInstantiationException} import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity import org.apache.texera.amber.util.JSONUtils.objectMapper @@ -78,18 +78,56 @@ class LogicalLinkSpec extends AnyFlatSpec { assert(a != b) } - it should "consider a self-loop link well-formed (same fromOpId / toOpId, distinct ports)" in { - // Self-loops aren't structurally invalid at the LogicalLink level — - // higher layers reject cycles, but the data type allows fromOpId == - // toOpId. Pin so a future == check on construction breaks this on - // purpose. - val selfLoop = LogicalLink( - OperatorIdentity("op-A"), - PortIdentity(0), - OperatorIdentity("op-A"), - PortIdentity(1) - ) - assert(selfLoop.fromOpId == selfLoop.toOpId) + it should "reject a self-loop link (fromOpId == toOpId) regardless of port" in { + // The constructor rejects fromOpId == toOpId — a workflow edge whose + // source and sink are the same operator can never be schedulable, so + // we fail fast here rather than letting it travel through the planner. + val ex = intercept[IllegalArgumentException] { + LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity("op-A"), + PortIdentity(1) + ) + } + assert(ex.getMessage.contains("self-loop")) + } + + it should "reject a null fromOpId / toOpId in the primary constructor" in { + intercept[IllegalArgumentException] { + LogicalLink(null, PortIdentity(0), OperatorIdentity("op-B"), PortIdentity(1)) + } + intercept[IllegalArgumentException] { + LogicalLink(OperatorIdentity("op-A"), PortIdentity(0), null, PortIdentity(1)) + } + } + + it should "reject an OperatorIdentity wrapping a null id in the primary constructor" in { + intercept[IllegalArgumentException] { + LogicalLink( + OperatorIdentity(null), + PortIdentity(0), + OperatorIdentity("op-B"), + PortIdentity(1) + ) + } + intercept[IllegalArgumentException] { + LogicalLink( + OperatorIdentity("op-A"), + PortIdentity(0), + OperatorIdentity(null), + PortIdentity(1) + ) + } + } + + it should "reject an OperatorIdentity wrapping an empty id in the primary constructor" in { + intercept[IllegalArgumentException] { + LogicalLink(OperatorIdentity(""), PortIdentity(0), OperatorIdentity("op-B"), PortIdentity(1)) + } + intercept[IllegalArgumentException] { + LogicalLink(OperatorIdentity("op-A"), PortIdentity(0), OperatorIdentity(""), PortIdentity(1)) + } } // --------------------------------------------------------------------------- @@ -122,13 +160,29 @@ class LogicalLinkSpec extends AnyFlatSpec { assert(link.toOpId == OperatorIdentity("my.op-2")) } - it should "accept the empty string as an op id (no validation in the data type)" in { - // Pin: the secondary constructor does not validate; an empty string - // wraps into `OperatorIdentity("")`. A future change adding non-empty - // validation should fail this test on purpose. - val link = new LogicalLink("", PortIdentity(0), "", PortIdentity(1)) - assert(link.fromOpId == OperatorIdentity("")) - assert(link.toOpId == OperatorIdentity("")) + it should "reject the empty string as an op id via the @JsonCreator constructor" in { + intercept[IllegalArgumentException] { + new LogicalLink("", PortIdentity(0), "op-B", PortIdentity(1)) + } + intercept[IllegalArgumentException] { + new LogicalLink("op-A", PortIdentity(0), "", PortIdentity(1)) + } + } + + it should "reject a null string op id via the @JsonCreator constructor" in { + intercept[IllegalArgumentException] { + new LogicalLink(null: String, PortIdentity(0), "op-B", PortIdentity(1)) + } + intercept[IllegalArgumentException] { + new LogicalLink("op-A", PortIdentity(0), null: String, PortIdentity(1)) + } + } + + it should "reject a self-loop via the @JsonCreator constructor (same string op id)" in { + val ex = intercept[IllegalArgumentException] { + new LogicalLink("op-A", PortIdentity(0), "op-A", PortIdentity(1)) + } + assert(ex.getMessage.contains("self-loop")) } // --------------------------------------------------------------------------- @@ -192,15 +246,16 @@ class LogicalLinkSpec extends AnyFlatSpec { } it should "NOT round-trip through writeValueAsString (the @JsonCreator string overload is incompatible with the object-shape OperatorIdentity that writeValueAsString emits)" in { - // This is a real asymmetry worth pinning: production reads user-saved - // workflow JSON where `fromOpId`/`toOpId` are plain strings, but - // `objectMapper.writeValueAsString` writes OperatorIdentity as - // `{"id":"op-A"}` (the case-class object form). Re-reading that - // emitted JSON fails because Jackson dispatches on the @JsonCreator - // string-overload, which can't accept an object for fromOpId. - // A future fix that adds a third @JsonCreator (object overload) or - // a custom @JsonDeserialize on `fromOpId`/`toOpId` will turn this - // into a passing round-trip and break this assertion on purpose. + // Characterization of a real asymmetry tracked by + // https://github.com/apache/texera/issues/5042. Production reads + // user-saved workflow JSON where `fromOpId`/`toOpId` are plain + // strings, but `objectMapper.writeValueAsString` writes + // OperatorIdentity as `{"id":"op-A"}` (the case-class object form). + // Re-reading the emitted JSON fails because Jackson dispatches on the + // @JsonCreator string overload, which can't accept an object for + // fromOpId. When the issue is fixed (additional @JsonCreator object + // overload or a custom @JsonDeserialize), this test must flip to a + // passing round-trip assertion alongside the fix. val original = LogicalLink( OperatorIdentity("op-A"), PortIdentity(0), @@ -221,20 +276,16 @@ class LogicalLinkSpec extends AnyFlatSpec { } } - it should "silently default missing string op-id fields to OperatorIdentity(null) (no validation in the @JsonCreator path)" in { - // Pin: omitting `fromOpId` / `toOpId` does not throw — Jackson invokes - // the @JsonCreator with `null` for the missing String args, and - // `OperatorIdentity(null)` is a perfectly constructable case-class - // instance. A future change adding a non-null check at construction - // time should fail this on purpose. + it should "reject missing string op-id fields when deserializing via Jackson" in { + // When `fromOpId` / `toOpId` are omitted, Jackson invokes the + // @JsonCreator with `null` for the missing String args. The primary + // constructor's `require` on non-null/non-empty ids then throws, and + // Jackson wraps it in `ValueInstantiationException` with the original + // `IllegalArgumentException` as the cause. val empty = objectMapper.createObjectNode() - val link = objectMapper.treeToValue(empty, classOf[LogicalLink]) - assert(link.fromOpId.id == null) - assert(link.toOpId.id == null) - // PortIdentity fields default to null too (the @JsonCreator does not - // supply scalapb defaults for missing object fields — Jackson passes - // null for the typed PortIdentity arg). - assert(link.fromPortId == null) - assert(link.toPortId == null) + val ex = intercept[ValueInstantiationException] { + objectMapper.treeToValue(empty, classOf[LogicalLink]) + } + assert(ex.getCause.isInstanceOf[IllegalArgumentException]) } }