This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-5545-90322602101d498e7af273ebfe14dcb5f7501566 in repository https://gitbox.apache.org/repos/asf/texera.git
commit ed22158534c1ae503fea9b4338a03862b5231c97 Author: Matthew B. <[email protected]> AuthorDate: Sun Jun 21 16:48:37 2026 -0700 refactor(config): unify default-data-transfer-batch-size into one config key (#5545) ### What changes were proposed in this PR? The default data-transfer batch size was controlled by **two** separate config keys, each with its own env var, both defaulting to `400`: - `network-buffering.default-data-transfer-batch-size` (`NETWORK_BUFFERING_DEFAULT_DATA_TRANSFER_BATCH_SIZE`), read by the backend (`ApplicationConfig.defaultDataTransferBatchSize`). - `gui.workflow-workspace.default-data-transfer-batch-size` (`GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE`), read by `GuiConfig` and sent to the frontend via `ConfigResource`. Having two keys for one value meant an operator could set one env var and forget the other, leaving the backend and GUI out of sync. This PR makes the `network-buffering` key the single source of truth: - `ConfigResource` now surfaces `ApplicationConfig.defaultDataTransferBatchSize` directly. - Removed the duplicate `guiWorkflowWorkspaceDefaultDataTransferBatchSize` field from `GuiConfig`. - Removed the duplicate key (and its env override) from `gui.conf`. The frontend is unchanged: the JSON field name (`defaultDataTransferBatchSize`) it consumes stays the same. ### Any related issues, documentation, discussions? Closes #5544 ### How was this PR tested? - `sbt Config/compile ConfigService/compile` both succeed. - Repo-wide grep confirms no remaining references to the old key or `GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE` (outside generated `dist/` artifacts). - The backend reader of the canonical key is untouched, so existing tests (e.g. `NetworkOutputBufferSpec`) are unaffected. - Also tested that the application performs as normal on all services (e.g., running workflows, uploading data, creating workflows, etc..) ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.8) --- common/config/src/main/resources/gui.conf | 4 --- .../apache/texera/common/config/GuiConfig.scala | 2 -- .../texera/service/resource/ConfigResource.scala | 11 ++++++- .../service/resource/ConfigResourceAuthSpec.scala | 34 ++++++++++++++++++++++ .../app/common/service/gui-config.service.spec.ts | 34 ++++++++++++++++------ .../src/app/common/service/gui-config.service.ts | 11 ++++--- 6 files changed, 76 insertions(+), 20 deletions(-) diff --git a/common/config/src/main/resources/gui.conf b/common/config/src/main/resources/gui.conf index a0673c9a3f..aa7e4ff541 100644 --- a/common/config/src/main/resources/gui.conf +++ b/common/config/src/main/resources/gui.conf @@ -59,10 +59,6 @@ gui { auto-attribute-correction-enabled = true auto-attribute-correction-enabled = ${?GUI_WORKFLOW_WORKSPACE_AUTO_ATTRIBUTE_CORRECTION_ENABLED} - # default data transfer batch size for workflows - default-data-transfer-batch-size = 400 - default-data-transfer-batch-size = ${?GUI_WORKFLOW_WORKSPACE_DEFAULT_DATA_TRANSFER_BATCH_SIZE} - # default execution mode for workflows, can be either MATERIALIZED or PIPELINED default-execution-mode = PIPELINED default-execution-mode = ${?GUI_WORKFLOW_WORKSPACE_DEFAULT_EXECUTION_MODE} diff --git a/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala b/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala index 3b378de127..65406b7e74 100644 --- a/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala +++ b/common/config/src/main/scala/org/apache/texera/common/config/GuiConfig.scala @@ -45,8 +45,6 @@ object GuiConfig { conf.getBoolean("gui.workflow-workspace.export-execution-result-enabled") val guiWorkflowWorkspaceAutoAttributeCorrectionEnabled: Boolean = conf.getBoolean("gui.workflow-workspace.auto-attribute-correction-enabled") - val guiWorkflowWorkspaceDefaultDataTransferBatchSize: Int = - conf.getInt("gui.workflow-workspace.default-data-transfer-batch-size") val guiWorkflowWorkspaceDefaultExecutionMode: String = conf.getString("gui.workflow-workspace.default-execution-mode") val guiWorkflowWorkspaceSelectingFilesFromDatasetsEnabled: Boolean = diff --git a/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala b/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala index 55dc386a3e..e80e2383e9 100644 --- a/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala +++ b/config-service/src/main/scala/org/apache/texera/service/resource/ConfigResource.scala @@ -23,6 +23,7 @@ import jakarta.annotation.security.{PermitAll, RolesAllowed} import jakarta.ws.rs.core.MediaType import jakarta.ws.rs.{GET, Path, Produces} import org.apache.texera.common.config.{ + ApplicationConfig, AuthConfig, ComputingUnitConfig, GuiConfig, @@ -67,7 +68,6 @@ class ConfigResource { "asyncRenderingEnabled" -> GuiConfig.guiWorkflowWorkspaceAsyncRenderingEnabled, "timetravelEnabled" -> GuiConfig.guiWorkflowWorkspaceTimetravelEnabled, "productionSharedEditingServer" -> GuiConfig.guiWorkflowWorkspaceProductionSharedEditingServer, - "defaultDataTransferBatchSize" -> GuiConfig.guiWorkflowWorkspaceDefaultDataTransferBatchSize, "defaultExecutionMode" -> GuiConfig.guiWorkflowWorkspaceDefaultExecutionMode, "workflowEmailNotificationEnabled" -> GuiConfig.guiWorkflowWorkspaceWorkflowEmailNotificationEnabled, "sharingComputingUnitEnabled" -> ComputingUnitConfig.sharingComputingUnitEnabled, @@ -81,6 +81,15 @@ class ConfigResource { "expirationTimeInMinutes" -> AuthConfig.jwtExpirationMinutes ) + // Engine configs. + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/amber") + def getAmberConfig: Map[String, Any] = + Map( + "defaultDataTransferBatchSize" -> ApplicationConfig.defaultDataTransferBatchSize + ) + @GET @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/user-system") diff --git a/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala b/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala index d5418ea0f7..2f9f5fb25c 100644 --- a/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala +++ b/config-service/src/test/scala/org/apache/texera/service/resource/ConfigResourceAuthSpec.scala @@ -153,6 +153,40 @@ class ConfigResourceAuthSpec extends AnyFlatSpec with Matchers with BeforeAndAft response.getStatus shouldBe 200 } + "GET /config/amber" should "return 401 with a Bearer challenge without an Authorization header" in { + val response = + resources.target("/config/amber").request(MediaType.APPLICATION_JSON).get() + response.getStatus shouldBe 401 + response.getHeaderString("WWW-Authenticate") shouldBe JwtAuthFilter.BearerChallenge + } + + it should "return 200 with a valid Bearer token whose role matches @RolesAllowed" in { + val response = resources + .target("/config/amber") + .request(MediaType.APPLICATION_JSON) + .header("Authorization", s"Bearer ${regularToken()}") + .get() + response.getStatus shouldBe 200 + } + + it should "expose the engine config separated from the gui payload" in { + // The endpoint exists to keep engine configs out of /config/gui (see PR #5545). + // Pin that defaultDataTransferBatchSize is served here and not folded back into gui. + val amberPayload = resources + .target("/config/amber") + .request(MediaType.APPLICATION_JSON) + .header("Authorization", s"Bearer ${regularToken()}") + .get(classOf[Map[String, Any]]) + amberPayload.keySet should contain("defaultDataTransferBatchSize") + + val guiPayload = resources + .target("/config/gui") + .request(MediaType.APPLICATION_JSON) + .header("Authorization", s"Bearer ${regularToken()}") + .get(classOf[Map[String, Any]]) + guiPayload.keySet should not contain "defaultDataTransferBatchSize" + } + "GET an @RolesAllowed probe endpoint" should "return 401 without an Authorization header" in { // Sanity: JwtAuthFilter is now eager — missing Authorization is rejected // by the filter itself with a 401 + Bearer challenge, before diff --git a/frontend/src/app/common/service/gui-config.service.spec.ts b/frontend/src/app/common/service/gui-config.service.spec.ts index 0294ea8307..92a0d5a615 100644 --- a/frontend/src/app/common/service/gui-config.service.spec.ts +++ b/frontend/src/app/common/service/gui-config.service.spec.ts @@ -40,6 +40,10 @@ const GUI_PAYLOAD = { defaultExecutionMode: "PIPELINED", }; +const AMBER_PAYLOAD = { + defaultDataTransferBatchSize: 400, +}; + const USER_SYSTEM_PAYLOAD = { inviteOnly: true, }; @@ -91,20 +95,24 @@ describe("GuiConfigService", () => { // ─── loadPostLogin ──────────────────────────────────────────────────────── - it("loadPostLogin fetches /config/gui and /config/user-system in parallel and merges", async () => { + it("loadPostLogin fetches /config/gui, /config/amber and /config/user-system in parallel and merges", async () => { const pending = firstValueFrom(service.loadPostLogin()); const gui = http.expectOne(`${API}/config/gui`); + const amber = http.expectOne(`${API}/config/amber`); const userSystem = http.expectOne(`${API}/config/user-system`); expect(gui.request.method).toBe("GET"); + expect(amber.request.method).toBe("GET"); expect(userSystem.request.method).toBe("GET"); gui.flush(GUI_PAYLOAD); + amber.flush(AMBER_PAYLOAD); userSystem.flush(USER_SYSTEM_PAYLOAD); await pending; expect(service.env.copilotEnabled).toBe(true); expect(service.env.limitColumns).toBe(42); + expect(service.env.defaultDataTransferBatchSize).toBe(400); expect(service.env.inviteOnly).toBe(true); }); @@ -118,6 +126,7 @@ describe("GuiConfigService", () => { const postLoginPending = firstValueFrom(service.loadPostLogin()); http.expectOne(`${API}/config/gui`).flush(GUI_PAYLOAD); + http.expectOne(`${API}/config/amber`).flush(AMBER_PAYLOAD); http.expectOne(`${API}/config/user-system`).flush(USER_SYSTEM_PAYLOAD); await postLoginPending; @@ -132,21 +141,23 @@ describe("GuiConfigService", () => { it("load() only hits /config/pre-login when no access token is in localStorage", async () => { const pending = firstValueFrom(service.load()); http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD); - // /config/gui and /config/user-system must not be requested when anonymous; + // post-login endpoints must not be requested when anonymous; // the no-Authorization-header request would 403 and pollute network logs. http.expectNone(`${API}/config/gui`); + http.expectNone(`${API}/config/amber`); http.expectNone(`${API}/config/user-system`); await pending; expect(service.env.localLogin).toBe(true); }); - it("load() chains /config/gui + /config/user-system when a token is stored", async () => { + it("load() chains /config/gui + /config/amber + /config/user-system when a token is stored", async () => { localStorage.setItem(TOKEN_KEY, "stored-token"); const pending = firstValueFrom(service.load()); http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD); - // forkJoin fires both requests in parallel after pre-login resolves. + // forkJoin fires all requests in parallel after pre-login resolves. http.expectOne(`${API}/config/gui`).flush(GUI_PAYLOAD); + http.expectOne(`${API}/config/amber`).flush(AMBER_PAYLOAD); http.expectOne(`${API}/config/user-system`).flush(USER_SYSTEM_PAYLOAD); await pending; @@ -164,11 +175,15 @@ describe("GuiConfigService", () => { http.expectOne(`${API}/config/pre-login`).flush(PRE_LOGIN_PAYLOAD); const guiReq = http.expectOne(`${API}/config/gui`); + const amberReq = http.expectOne(`${API}/config/amber`); const userSystemReq = http.expectOne(`${API}/config/user-system`); guiReq.flush({}, { status: 403, statusText: "Forbidden" }); - // forkJoin tears down the sibling observable on first error, so the - // user-system request is already cancelled by the time we get here. - // Flushing a cancelled TestRequest throws. + // forkJoin tears down the sibling observables on first error, so the + // amber and user-system requests are already cancelled by the time we get + // here. Flushing a cancelled TestRequest throws. + if (!amberReq.cancelled) { + amberReq.flush({}, { status: 403, statusText: "Forbidden" }); + } if (!userSystemReq.cancelled) { userSystemReq.flush({}, { status: 403, statusText: "Forbidden" }); } @@ -185,9 +200,10 @@ describe("GuiConfigService", () => { localStorage.setItem(TOKEN_KEY, "stored-token"); const pending = firstValueFrom(service.load()); http.expectOne(`${API}/config/pre-login`).error(new ProgressEvent("offline")); - // /config/gui must NOT be attempted if pre-login fails — the catchError on - // the inner pipe must not catch the outer pre-login rejection. + // post-login endpoints must NOT be attempted if pre-login fails: the + // catchError on the inner pipe must not catch the outer pre-login rejection. http.expectNone(`${API}/config/gui`); + http.expectNone(`${API}/config/amber`); http.expectNone(`${API}/config/user-system`); await expect(pending).rejects.toThrow(/pre-login configuration/); }); diff --git a/frontend/src/app/common/service/gui-config.service.ts b/frontend/src/app/common/service/gui-config.service.ts index e96d98b191..28695f7d06 100644 --- a/frontend/src/app/common/service/gui-config.service.ts +++ b/frontend/src/app/common/service/gui-config.service.ts @@ -29,7 +29,9 @@ import { AppSettings } from "../app-setting"; const ACCESS_TOKEN_KEY = "access_token"; type PreLoginConfig = Pick<GuiConfig, "localLogin" | "googleLogin" | "defaultLocalUser" | "attributionEnabled">; -type GuiOnlyConfig = Omit<GuiConfig, keyof PreLoginConfig | "inviteOnly">; +// Fields served by /config/amber. +type AmberConfig = Pick<GuiConfig, "defaultDataTransferBatchSize">; +type GuiOnlyConfig = Omit<GuiConfig, keyof PreLoginConfig | keyof AmberConfig | "inviteOnly">; type UserSystemConfig = Pick<GuiConfig, "inviteOnly">; @Injectable({ providedIn: "root" }) @@ -83,10 +85,11 @@ export class GuiConfigService { */ loadPostLogin(): Observable<Partial<GuiConfig>> { const guiConfig$ = this.http.get<GuiOnlyConfig>(`${AppSettings.getApiEndpoint()}/config/gui`); + const amberConfig$ = this.http.get<AmberConfig>(`${AppSettings.getApiEndpoint()}/config/amber`); const userSystemConfig$ = this.http.get<UserSystemConfig>(`${AppSettings.getApiEndpoint()}/config/user-system`); - return forkJoin([guiConfig$, userSystemConfig$]).pipe( - tap(([guiConfig, userSystemConfig]) => { - this.config = { ...this.config, ...guiConfig, ...userSystemConfig }; + return forkJoin([guiConfig$, amberConfig$, userSystemConfig$]).pipe( + tap(([guiConfig, amberConfig, userSystemConfig]) => { + this.config = { ...this.config, ...guiConfig, ...amberConfig, ...userSystemConfig }; }), map(() => this.config) );
