From 0be5b9e7ac7b6d410e46ea956eba1e54411a2edc Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sat, 20 Jun 2026 21:47:25 -0700 Subject: [PATCH 1/6] Added static asset cache for frontend --- .../texera/web/StaticAssetCacheFilter.scala | 63 ++++++++++++++++++ .../texera/web/TexeraWebApplication.scala | 7 ++ .../web/StaticAssetCacheFilterSpec.scala | 66 +++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala create mode 100644 amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala diff --git a/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala b/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala new file mode 100644 index 00000000000..74734ecfabb --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web + +import javax.servlet.http.{HttpServletRequest, HttpServletResponse} +import javax.servlet.{Filter, FilterChain, FilterConfig, ServletRequest, ServletResponse} + +object StaticAssetCacheFilter { + + // Matches `name..ext` file names. + private val FingerprintedAsset = """.*\.[0-9a-f]{8,}\.[A-Za-z0-9]+""".r + + val ImmutableCacheControl = "public, max-age=31536000, immutable" + val RevalidateCacheControl = "no-cache, must-revalidate" + + // None for /api/*; immutable for fingerprinted assets; revalidate otherwise. + def cacheControlFor(path: String): Option[String] = { + if (path.startsWith("/api/")) None + else + path.substring(path.lastIndexOf('/') + 1) match { + case FingerprintedAsset() => Some(ImmutableCacheControl) + case _ => Some(RevalidateCacheControl) + } + } +} + +class StaticAssetCacheFilter extends Filter { + override def init(filterConfig: FilterConfig): Unit = {} + + override def doFilter( + request: ServletRequest, + response: ServletResponse, + chain: FilterChain + ): Unit = { + (request, response) match { + case (req: HttpServletRequest, resp: HttpServletResponse) => + StaticAssetCacheFilter + .cacheControlFor(req.getRequestURI) + .foreach(resp.setHeader("Cache-Control", _)) + case _ => + } + chain.doFilter(request, response) + } + + override def destroy(): Unit = {} +} diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala index c93f75fe75b..6e504aedc74 100644 --- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala +++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala @@ -164,6 +164,13 @@ class TexeraWebApplication AuthResource.createAdminUser() + // Set Cache-Control on static frontend asset responses. + environment.getApplicationContext.addFilter( + new FilterHolder(new StaticAssetCacheFilter), + "/*", + java.util.EnumSet.allOf(classOf[javax.servlet.DispatcherType]) + ) + // Route request logs through SLF4J, controlled by TEXERA_SERVICE_LOG_LEVEL. // TODO: replace with RequestLoggingFilter.register() from common/auth once Dropwizard is upgraded to 4.x val requestLogger = org.slf4j.LoggerFactory.getLogger("org.eclipse.jetty.server.RequestLog") diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala new file mode 100644 index 00000000000..1d6bff92f1e --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web + +import org.apache.texera.web.StaticAssetCacheFilter.{ImmutableCacheControl, RevalidateCacheControl} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { + + private def cc(path: String) = StaticAssetCacheFilter.cacheControlFor(path) + + "cacheControlFor" should "mark content-hashed JS and CSS bundles immutable" in { + cc("/main.138cf96bab6ef6d9.js") shouldBe Some(ImmutableCacheControl) + cc("/styles.266ff0ada80cd80a.css") shouldBe Some(ImmutableCacheControl) + cc("/polyfills.9d67f25b35182fa7.js") shouldBe Some(ImmutableCacheControl) + } + + it should "mark content-hashed media assets immutable" in { + cc("/assets/roboto.abcdef12.woff2") shouldBe Some(ImmutableCacheControl) + } + + it should "force revalidation of the index document so a deploy is never served stale" in { + cc("/") shouldBe Some(RevalidateCacheControl) + cc("/index.html") shouldBe Some(RevalidateCacheControl) + } + + it should "force revalidation of Angular route paths (served the index document via the 404 fallback)" in { + cc("/dashboard") shouldBe Some(RevalidateCacheControl) + cc("/dashboard/workflow/42") shouldBe Some(RevalidateCacheControl) + } + + it should "force revalidation of non-fingerprinted static files" in { + cc("/favicon.ico") shouldBe Some(RevalidateCacheControl) + cc("/assets/logo.png") shouldBe Some(RevalidateCacheControl) + cc("/3rdpartylicenses.txt") shouldBe Some(RevalidateCacheControl) + } + + it should "leave backend /api/* responses untouched" in { + cc("/api/workflow/123") shouldBe None + cc("/api/auth/login") shouldBe None + } + + it should "not mistake a short numeric segment for a content hash" in { + // "v2" / "12345" are too short to be a fingerprint; only 8+ hex chars qualify. + cc("/app.v2.js") shouldBe Some(RevalidateCacheControl) + cc("/data.12345.json") shouldBe Some(RevalidateCacheControl) + } +} From c93e3bfb7403f9193c96d44a564283f87c477406 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sun, 21 Jun 2026 00:28:30 -0700 Subject: [PATCH 2/6] avoid freezing on long numeric segments --- .../texera/web/StaticAssetCacheFilter.scala | 19 ++++++++++++------- .../web/StaticAssetCacheFilterSpec.scala | 7 +++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala b/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala index 74734ecfabb..32dffbe4048 100644 --- a/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala +++ b/amber/src/main/scala/org/apache/texera/web/StaticAssetCacheFilter.scala @@ -24,20 +24,25 @@ import javax.servlet.{Filter, FilterChain, FilterConfig, ServletRequest, Servlet object StaticAssetCacheFilter { - // Matches `name..ext` file names. - private val FingerprintedAsset = """.*\.[0-9a-f]{8,}\.[A-Za-z0-9]+""".r + // `name..ext`, capturing the hash segment. + private val FingerprintedAsset = """.*\.([0-9a-f]{8,})\.[A-Za-z0-9]+""".r val ImmutableCacheControl = "public, max-age=31536000, immutable" val RevalidateCacheControl = "no-cache, must-revalidate" + // Require a hex letter so all-numeric segments (dates, versions) aren't frozen as a hash. + private def isFingerprinted(fileName: String): Boolean = + fileName match { + case FingerprintedAsset(hash) => hash.exists(c => c >= 'a' && c <= 'f') + case _ => false + } + // None for /api/*; immutable for fingerprinted assets; revalidate otherwise. def cacheControlFor(path: String): Option[String] = { if (path.startsWith("/api/")) None - else - path.substring(path.lastIndexOf('/') + 1) match { - case FingerprintedAsset() => Some(ImmutableCacheControl) - case _ => Some(RevalidateCacheControl) - } + else if (isFingerprinted(path.substring(path.lastIndexOf('/') + 1))) + Some(ImmutableCacheControl) + else Some(RevalidateCacheControl) } } diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala index 1d6bff92f1e..fb3dd83c7d1 100644 --- a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -63,4 +63,11 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { cc("/app.v2.js") shouldBe Some(RevalidateCacheControl) cc("/data.12345.json") shouldBe Some(RevalidateCacheControl) } + + it should "not freeze long purely-numeric segments (dates, version numbers)" in { + // A real content hash contains hex letters; an all-digit segment is more likely a + // date or version stamp and must not be cached immutably for a year. + cc("/report.20240101.csv") shouldBe Some(RevalidateCacheControl) + cc("/photo.20240101120000.jpg") shouldBe Some(RevalidateCacheControl) + } } From 9083e99aeb6af12402742a0b3d86c1d28c4c20f4 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sun, 21 Jun 2026 00:39:49 -0700 Subject: [PATCH 3/6] added test cases --- .../web/StaticAssetCacheFilterSpec.scala | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala index fb3dd83c7d1..35fad25f0cd 100644 --- a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -23,6 +23,12 @@ import org.apache.texera.web.StaticAssetCacheFilter.{ImmutableCacheControl, Reva import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers +import java.lang.reflect.{InvocationHandler, Method, Proxy} +import java.util.concurrent.atomic.AtomicBoolean +import javax.servlet.http.{HttpServletRequest, HttpServletResponse} +import javax.servlet.{FilterChain, ServletRequest, ServletResponse} +import scala.collection.mutable + class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { private def cc(path: String) = StaticAssetCacheFilter.cacheControlFor(path) @@ -70,4 +76,93 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { cc("/report.20240101.csv") shouldBe Some(RevalidateCacheControl) cc("/photo.20240101120000.jpg") shouldBe Some(RevalidateCacheControl) } + + it should "fingerprint assets in nested directories and multi-dot chunk names" in { + cc("/assets/fonts/roboto.abcdef12.woff2") shouldBe Some(ImmutableCacheControl) + cc("/vendor.es2015.8a9b0c1d2e3f4a5b.js") shouldBe Some(ImmutableCacheControl) + } + + it should "only match lowercase hex hashes, as emitted by the Angular build" in { + cc("/main.ABCDEF1234567890.js") shouldBe Some(RevalidateCacheControl) + } + + it should "only exclude the /api/ prefix, not paths merely starting with 'api'" in { + cc("/api") shouldBe Some(RevalidateCacheControl) + cc("/api-docs.html") shouldBe Some(RevalidateCacheControl) + } + + // --- doFilter wiring, exercised via dependency-free dynamic-proxy doubles --- + + // A proxy that answers the handled methods and returns nulls/zeros for everything else. + private def proxy[T](cls: Class[T])(handler: PartialFunction[(String, Seq[AnyRef]), AnyRef]): T = { + val h = new InvocationHandler { + override def invoke(p: Any, m: Method, args: Array[AnyRef]): AnyRef = { + val a = if (args == null) Seq.empty[AnyRef] else args.toSeq + handler.applyOrElse((m.getName, a), (_: (String, Seq[AnyRef])) => defaultValue(m.getReturnType)) + } + } + Proxy.newProxyInstance(cls.getClassLoader, Array[Class[_]](cls), h).asInstanceOf[T] + } + + private def defaultValue(t: Class[_]): AnyRef = + if (t == java.lang.Boolean.TYPE) java.lang.Boolean.FALSE + else if (t == java.lang.Integer.TYPE) java.lang.Integer.valueOf(0) + else if (t == java.lang.Long.TYPE) java.lang.Long.valueOf(0L) + else null + + private def httpRequest(uri: String): HttpServletRequest = + proxy(classOf[HttpServletRequest]) { case ("getRequestURI", _) => uri } + + private def httpResponse(into: mutable.Map[String, String]): HttpServletResponse = + proxy(classOf[HttpServletResponse]) { + case ("setHeader", Seq(name, value)) => into.update(name.toString, value.toString); null + } + + private def recordingChain(invoked: AtomicBoolean): FilterChain = + (_: ServletRequest, _: ServletResponse) => invoked.set(true) + + "doFilter" should "set immutable Cache-Control on a fingerprinted asset and continue the chain" in { + val headers = mutable.Map.empty[String, String] + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter() + .doFilter(httpRequest("/main.138cf96bab6ef6d9.js"), httpResponse(headers), recordingChain(chained)) + headers.get("Cache-Control") shouldBe Some(ImmutableCacheControl) + chained.get() shouldBe true + } + + it should "set revalidate Cache-Control on a non-fingerprinted asset" in { + val headers = mutable.Map.empty[String, String] + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter() + .doFilter(httpRequest("/index.html"), httpResponse(headers), recordingChain(chained)) + headers.get("Cache-Control") shouldBe Some(RevalidateCacheControl) + chained.get() shouldBe true + } + + it should "leave /api/* responses untouched but still continue the chain" in { + val headers = mutable.Map.empty[String, String] + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter() + .doFilter(httpRequest("/api/workflow/1"), httpResponse(headers), recordingChain(chained)) + headers shouldBe empty + chained.get() shouldBe true + } + + it should "ignore non-HTTP request/response pairs but still continue the chain" in { + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter().doFilter( + proxy(classOf[ServletRequest])(PartialFunction.empty), + proxy(classOf[ServletResponse])(PartialFunction.empty), + recordingChain(chained) + ) + chained.get() shouldBe true + } + + "init and destroy" should "be no-ops that do not throw" in { + val filter = new StaticAssetCacheFilter() + noException should be thrownBy { + filter.init(null) + filter.destroy() + } + } } From fb546e537353c495b2c5f0ab866ce77688c29136 Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Sun, 21 Jun 2026 00:47:58 -0700 Subject: [PATCH 4/6] lint --- .../texera/web/StaticAssetCacheFilterSpec.scala | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala index 35fad25f0cd..5f9d17b3e05 100644 --- a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -94,11 +94,16 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { // --- doFilter wiring, exercised via dependency-free dynamic-proxy doubles --- // A proxy that answers the handled methods and returns nulls/zeros for everything else. - private def proxy[T](cls: Class[T])(handler: PartialFunction[(String, Seq[AnyRef]), AnyRef]): T = { + private def proxy[T]( + cls: Class[T] + )(handler: PartialFunction[(String, Seq[AnyRef]), AnyRef]): T = { val h = new InvocationHandler { override def invoke(p: Any, m: Method, args: Array[AnyRef]): AnyRef = { val a = if (args == null) Seq.empty[AnyRef] else args.toSeq - handler.applyOrElse((m.getName, a), (_: (String, Seq[AnyRef])) => defaultValue(m.getReturnType)) + handler.applyOrElse( + (m.getName, a), + (_: (String, Seq[AnyRef])) => defaultValue(m.getReturnType) + ) } } Proxy.newProxyInstance(cls.getClassLoader, Array[Class[_]](cls), h).asInstanceOf[T] @@ -125,7 +130,11 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { val headers = mutable.Map.empty[String, String] val chained = new AtomicBoolean(false) new StaticAssetCacheFilter() - .doFilter(httpRequest("/main.138cf96bab6ef6d9.js"), httpResponse(headers), recordingChain(chained)) + .doFilter( + httpRequest("/main.138cf96bab6ef6d9.js"), + httpResponse(headers), + recordingChain(chained) + ) headers.get("Cache-Control") shouldBe Some(ImmutableCacheControl) chained.get() shouldBe true } From 17f5d5b90a574ec46bcf9e8de7f000c38277379a Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Mon, 22 Jun 2026 04:03:37 -0700 Subject: [PATCH 5/6] added test cases --- .../web/StaticAssetCacheFilterSpec.scala | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala index 5f9d17b3e05..c74047f0a41 100644 --- a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -91,6 +91,34 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { cc("/api-docs.html") shouldBe Some(RevalidateCacheControl) } + it should "require at least eight hex characters for a fingerprint" in { + // Seven hex chars is one short of an Angular content hash and must not be frozen. + cc("/main.abcdef1.js") shouldBe Some(RevalidateCacheControl) + // Eight is the minimum that qualifies. + cc("/main.abcdef12.js") shouldBe Some(ImmutableCacheControl) + } + + it should "not freeze an all-digit segment even at fingerprint length" in { + // Eight digits is long enough for the regex but contains no hex letter, so it is + // treated as a version/date stamp rather than a content hash. + cc("/main.12345678.js") shouldBe Some(RevalidateCacheControl) + } + + it should "not treat non-hex letters as a content hash" in { + cc("/main.ghijklmn.js") shouldBe Some(RevalidateCacheControl) + cc("/main.zzzzzzzz.js") shouldBe Some(RevalidateCacheControl) + } + + it should "fingerprint assets regardless of the file extension's case" in { + cc("/main.138cf96bab6ef6d9.JS") shouldBe Some(ImmutableCacheControl) + cc("/styles.266ff0ada80cd80a.CSS") shouldBe Some(ImmutableCacheControl) + } + + it should "force revalidation of directory paths ending in a slash" in { + cc("/assets/") shouldBe Some(RevalidateCacheControl) + cc("/dashboard/") shouldBe Some(RevalidateCacheControl) + } + // --- doFilter wiring, exercised via dependency-free dynamic-proxy doubles --- // A proxy that answers the handled methods and returns nulls/zeros for everything else. From 74967c786e0d34b276467fc8a9a3173734a6675d Mon Sep 17 00:00:00 2001 From: Matthew Ball Date: Mon, 22 Jun 2026 11:53:33 -0700 Subject: [PATCH 6/6] added test case to ignore request or response chanins if a non-HTTP value is included --- .../web/StaticAssetCacheFilterSpec.scala | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala index c74047f0a41..2c148d200a9 100644 --- a/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/StaticAssetCacheFilterSpec.scala @@ -195,6 +195,28 @@ class StaticAssetCacheFilterSpec extends AnyFlatSpec with Matchers { chained.get() shouldBe true } + it should "ignore an HTTP request paired with a non-HTTP response but still continue the chain" in { + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter().doFilter( + httpRequest("/main.138cf96bab6ef6d9.js"), + proxy(classOf[ServletResponse])(PartialFunction.empty), + recordingChain(chained) + ) + chained.get() shouldBe true + } + + it should "ignore a non-HTTP request paired with an HTTP response but still continue the chain" in { + val headers = mutable.Map.empty[String, String] + val chained = new AtomicBoolean(false) + new StaticAssetCacheFilter().doFilter( + proxy(classOf[ServletRequest])(PartialFunction.empty), + httpResponse(headers), + recordingChain(chained) + ) + headers shouldBe empty + chained.get() shouldBe true + } + "init and destroy" should "be no-ops that do not throw" in { val filter = new StaticAssetCacheFilter() noException should be thrownBy {