Xiao-zhen-Liu commented on code in PR #5141:
URL: https://github.com/apache/texera/pull/5141#discussion_r3344578847


##########
common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonLexerUtils.scala:
##########
@@ -37,6 +37,64 @@ object PythonLexerUtils {
     if (lastNewlineIndex >= 0) s.substring(lastNewlineIndex + 1) else s
   }
 
+  /**
+    * Update triple-quoted-string state after scanning one physical Python 
source line.
+    *
+    * This is intentionally lightweight. It only tracks whether scanning is 
inside a `'''` or `"""` string so callers
+    * that reason about indentation can avoid treating string contents as real 
Python statements.
+    */
+  def updateTripleQuotedStringState(

Review Comment:
   Nice helper — covers the case I had in mind cleanly. A couple of edge cases 
this intentionally doesn't model that would be worth a sentence in the 
docstring so future maintainers don't get caught:
   
   1. Escapes inside triple-quoted strings — `\"""` in a `"""…"""` block can 
prematurely close, since the inside-delimiter arm just looks for a literal 
match.
   2. Adjacency like `"abc""""""rest"""` — a `"""` right after a closing `"` is 
picked up regardless of the surrounding single-quote state.
   
   Both are unlikely in real UDFs, but listing them as known limitations would 
set the right expectation.



##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/udf/python/PythonUdfUiParameterInjectorSpec.scala:
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.texera.amber.operator.udf.python
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class PythonUdfUiParameterInjectorSpec extends AnyFlatSpec with Matchers {
+
+  private def uiParameter(
+      attributeName: String,
+      attributeType: AttributeType,
+      value: String
+  ): UiUDFParameter = {
+    val parameter = new UiUDFParameter
+    parameter.attribute = new Attribute(attributeName, attributeType)
+    parameter.value = value
+    parameter
+  }
+
+  private def inject(parameters: UiUDFParameter*): String =
+    PythonUdfUiParameterInjector.inject(baseUdfCode, parameters.toList)
+
+  private def inject(code: String, parameters: UiUDFParameter*): String =
+    PythonUdfUiParameterInjector.inject(code, parameters.toList)
+
+  private def decoderCallCount(code: String): Int =
+    code.sliding("self.decode_python_template".length).count(_ == 
"self.decode_python_template")
+
+  private val baseUdfCode: String =
+    """from pytexera import *
+      |
+      |class ProcessTupleOperator(UDFOperatorV2):
+      |    @overrides
+      |    def open(self):
+      |        print("open")
+      |
+      |    @overrides
+      |    def process_tuple(self, tuple_: Tuple, port: int):
+      |        yield tuple_
+      |""".stripMargin
+
+  it should "return user code unchanged when there are no UI parameters" in {
+    val injectedCode = inject()
+
+    injectedCode should include("class ProcessTupleOperator(UDFOperatorV2):")
+    injectedCode should include("""print("open")""")
+    injectedCode should not include ("_texera_injected_ui_parameters")
+    injectedCode should not include ("self.decode_python_template")
+    injectedCode should not include ("import typing")
+  }
+
+  it should "preserve user source lines that look like Scala stripMargin 
input" in {
+    val udfCodeWithPipeLine =
+      """from pytexera import *
+        |
+        |class ProcessTupleOperator(UDFOperatorV2):
+        |    def open(self):
+        |        pattern = "keep"
+        |        text = '''
+        |    |do not strip this line
+        |'''
+        |
+        |    def process_tuple(self, tuple_: Tuple, port: int):
+        |        yield tuple_
+        |""".stripMargin
+
+    val injectedCode = inject(udfCodeWithPipeLine, uiParameter("k", 
AttributeType.STRING, "v"))
+
+    injectedCode should include("    |do not strip this line")
+    injectedCode should include("def _texera_injected_ui_parameters(self) -> 
Dict[str, Any]:")
+  }
+
+  it should "inject UI parameter hook into supported UDF class using Dict and 
Any from pytexera" in {
+    val injectedCode = inject(uiParameter("date", AttributeType.TIMESTAMP, 
"2024-01-01T00:00:00Z"))
+
+    injectedCode should include("class ProcessTupleOperator(UDFOperatorV2):")
+    injectedCode should include(
+      "# Follow-up runtime support exports Dict/Any and defines the base hook 
that @overrides targets."
+    )
+    injectedCode should include("def _texera_injected_ui_parameters(self) -> 
Dict[str, Any]:")
+    injectedCode should include("return {")
+    injectedCode should include("self.decode_python_template")
+    decoderCallCount(injectedCode) shouldBe 2
+    injectedCode should include("""print("open")""")
+    injectedCode should not include ("import typing")
+    injectedCode should not include ("typing.Dict")
+    injectedCode should not include ("typing.Any")
+  }
+
+  it should "append the reserved hook inside the class before the next 
top-level statement" in {
+    val udfCodeWithSiblingDefinition =
+      """from pytexera import *
+        |
+        |class ProcessTupleOperator(UDFOperatorV2):
+        |    @overrides
+        |    def open(self):
+        |        print("open")
+        |
+        |    @overrides
+        |    def process_tuple(self, tuple_: Tuple, port: int):
+        |        yield tuple_
+        |
+        |def helper():
+        |    return "outside"
+        |""".stripMargin
+
+    val injectedCode =
+      inject(udfCodeWithSiblingDefinition, uiParameter("k", 
AttributeType.STRING, "v"))
+
+    val hookIndex = injectedCode.indexOf("def 
_texera_injected_ui_parameters(self)")
+    val processTupleIndex =
+      injectedCode.indexOf("def process_tuple(self, tuple_: Tuple, port: 
int):")
+    val helperIndex = injectedCode.indexOf("def helper():")
+
+    hookIndex should be >= 0
+    processTupleIndex should be < hookIndex
+    helperIndex should be > hookIndex
+  }
+
+  it should "append the reserved hook after triple-quoted strings that contain 
top-level-looking lines" in {
+    val udfCodeWithTripleQuotedString =
+      """from pytexera import *
+        |
+        |class ProcessTupleOperator(UDFOperatorV2):
+        |    def process_tuple(self, tuple_: Tuple, port: int):
+        |        sql = '''
+        |SELECT * FROM t
+        |'''
+        |        yield tuple_
+        |
+        |def helper():
+        |    return "outside"
+        |""".stripMargin
+
+    val injectedCode =
+      inject(udfCodeWithTripleQuotedString, uiParameter("k", 
AttributeType.STRING, "v"))
+
+    val hookIndex = injectedCode.indexOf("def 
_texera_injected_ui_parameters(self)")
+    val stringEndIndex = injectedCode.indexOf("'''\n        yield tuple_")
+    val helperIndex = injectedCode.indexOf("def helper():")
+
+    stringEndIndex should be >= 0
+    stringEndIndex should be < hookIndex
+    hookIndex should be < helperIndex
+  }
+
+  it should "preserve multiple UI parameters in the injected map" in {
+    val injectedCode = inject(
+      uiParameter("param1", AttributeType.DOUBLE, "12.5"),
+      uiParameter("param2", AttributeType.INTEGER, "1"),
+      uiParameter("param3", AttributeType.STRING, "Hola"),
+      uiParameter("param4", AttributeType.TIMESTAMP, "2026-02-28T03:15:00Z")
+    )
+
+    injectedCode should include("def _texera_injected_ui_parameters(self) -> 
Dict[str, Any]:")
+    injectedCode should include("self.decode_python_template")
+    decoderCallCount(injectedCode) shouldBe 8
+    injectedCode should not include ("import typing")
+  }
+
+  it should "throw when a parameter attribute is missing" in {
+    val invalidParameter = new UiUDFParameter
+    invalidParameter.attribute = null
+    invalidParameter.value = "anything"
+
+    val exception = the[RuntimeException] thrownBy {
+      inject(invalidParameter)
+    }
+
+    exception.getMessage should include("UiParameter attribute is required")
+  }
+
+  it should "throw when a UI parameter name is duplicated" in {
+    val exception = the[RuntimeException] thrownBy {
+      inject(
+        uiParameter("date", AttributeType.STRING, "2024-01-01"),
+        uiParameter("date", AttributeType.TIMESTAMP, "2024-01-01T00:00:00Z")
+      )
+    }
+
+    exception.getMessage should include("UiParameter name 'date' is declared 
more than once")
+  }
+
+  Seq(AttributeType.BINARY, AttributeType.LARGE_BINARY).foreach { 
unsupportedType =>
+    it should s"throw when a UI parameter uses ${unsupportedType.name()} type" 
in {
+      val exception = the[RuntimeException] thrownBy {
+        inject(uiParameter("payload", unsupportedType, "68656c6c6f"))
+      }
+
+      exception.getMessage should include(
+        s"UiParameter type '${unsupportedType.name()}' is not supported"
+      )
+    }
+  }
+
+  it should "throw when the reserved hook is already defined by the user" in {
+    val udfWithReservedHook =
+      """from pytexera import *
+        |
+        |class ProcessTupleOperator(UDFOperatorV2):
+        |    def _texera_injected_ui_parameters(self):
+        |        return {}
+        |
+        |    def open(self):
+        |        pass
+        |""".stripMargin
+
+    val exception = the[RuntimeException] thrownBy {
+      inject(udfWithReservedHook, uiParameter("k", AttributeType.STRING, "v"))
+    }
+
+    exception.getMessage should include(
+      "Reserved method '_texera_injected_ui_parameters' is already defined"
+    )
+  }
+
+  it should "throw when UI parameters are provided but no supported user class 
is present" in {

Review Comment:
   Small coverage gap now that the silent passthrough is gone: the "no 
parameters AND unsupported class" path is implicit (handled by the 
`parameters.isEmpty` early return in `inject`). One quick test asserting 
`inject(nonSupportedCode)` returns the code unchanged would lock that contract 
in alongside this one.



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/PythonUdfUiParameterInjector.scala:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.texera.amber.operator.udf.python
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType}
+import 
org.apache.texera.amber.pybuilder.PythonLexerUtils.updateTripleQuotedStringState
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
+import 
org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+
+import scala.util.matching.Regex
+
+/**
+  * Injects the reserved UI-parameter hook into user-written Python UDF code.
+  *
+  * Operator descriptors should call this after loading saved 
[[UiUDFParameter]] values and before sending Python source
+  * to runtime execution. The injected hook returns decoded parameter names 
and values that Python runtime support reads
+  * before the user's `open()` method runs.
+  */
+object PythonUdfUiParameterInjector {
+
+  private val InjectedUiParametersHookMethodName = 
"_texera_injected_ui_parameters"
+  private val InjectedUiParametersHookMethodHeader =
+    s"def $InjectedUiParametersHookMethodName(self) -> Dict[str, Any]:"
+  private val UnsupportedUiParameterTypes = Set(AttributeType.BINARY, 
AttributeType.LARGE_BINARY)
+
+  // Keep supported user-facing UDF class names in sync with the frontend 
parser.
+  private val SupportedPythonUdfClassHeaderRegex: Regex =
+    """(?m)^([ 
\t]*)class\s+(ProcessTupleOperator|ProcessBatchOperator|ProcessTableOperator|GenerateOperator)\s*\([^)]*\)\s*:\s*(?:#.*)?$""".r
+
+  private def validate(uiParameters: List[UiUDFParameter]): Unit = {
+    val attributes = uiParameters.map(parameterAttribute)
+    attributes.foreach(validateSupportedType)
+
+    attributes
+      .groupBy(_.getName)
+      .collectFirst {
+        case (parameterName, matchingAttributes) if matchingAttributes.size > 
1 => parameterName
+      }
+      .foreach { duplicateName =>
+        throw new RuntimeException(s"UiParameter name '$duplicateName' is 
declared more than once.")
+      }
+  }
+
+  private def parameterAttribute(parameter: UiUDFParameter): Attribute =
+    Option(parameter).flatMap(parameter => 
Option(parameter.attribute)).getOrElse {
+      throw new RuntimeException("UiParameter attribute is required.")
+    }
+
+  private def validateSupportedType(attribute: Attribute): Unit = {
+    if (UnsupportedUiParameterTypes.contains(attribute.getType)) {
+      throw new RuntimeException(
+        s"UiParameter type '${attribute.getType.name()}' is not supported. " +
+          "Use string, integer, long, double, boolean, or timestamp instead."
+      )
+    }
+  }
+
+  private def buildInjectedParameterEntry(parameter: UiUDFParameter): 
PythonTemplateBuilder = {
+    pyb"${parameter.attribute.getName}: ${parameter.value}"
+  }
+
+  private def buildInjectedParametersMap(
+      uiParameters: List[UiUDFParameter]
+  ): PythonTemplateBuilder = {
+    val entries = uiParameters.map(buildInjectedParameterEntry)
+    entries.reduceOption((acc, entry) => acc + pyb", " + 
entry).getOrElse(pyb"")
+  }
+
+  private def buildInjectedHookMethod(uiParameters: List[UiUDFParameter]): 
String = {
+    val injectedParametersMap = buildInjectedParametersMap(uiParameters)
+
+    (pyb"""|# Follow-up runtime support exports Dict/Any and defines the base 
hook that @overrides targets.

Review Comment:
   Once PR #3 lands, this comment turns into a description of an 
already-satisfied dependency rather than a follow-up, and the test below pins 
the exact wording. Two ways to keep it from drifting: reword to something 
timeless (e.g. `# Requires Dict/Any export and the base hook from the Python 
runtime`), or plan to remove the comment and the matching assertion when the 
runtime PR merges. Either is fine — just want to make sure it doesn't get left 
behind.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to