This is an automated email from the ASF dual-hosted git repository. robertlazarski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/axis-axis2-java-core.git
commit 33fc4e9932b8ef39776d25a27470ed473d1b6292 Author: Robert Lazarski <[email protected]> AuthorDate: Tue Apr 7 07:41:58 2026 -1000 D3 gen_mcp_schema.py: fix two struct-parsing bugs Bug 1 — comment brace stops struct regex prematurely: _STRUCT_RE uses [^}]+ which stops at the first '}' character. A struct body comment like '* Defaults: {0.01, 0.05}' contains a '}' that terminates the capture before the actual struct closing brace, causing the struct to be silently dropped from the parsed output. Fix: strip comments from the full header_text before running _STRUCT_RE (not just from the body after the fact). This recovers structs like finbench_monte_carlo_request_t which had exactly this pattern. Bug 2 — camelCase operation names not matched to snake_case struct names: find_request_struct tried '{prefix}{op_name}_request_t' literally, so 'portfolioVariance' never matched 'finbench_portfolio_variance_request_t'. Fix: add _camel_to_snake() helper (regex: insert '_' before each uppercase that follows a lowercase/digit, then lowercase) and extend the candidate list to include the snake_case variants at every prefix level. After both fixes, running the tool against financial_benchmark_service.h correctly parses 8 structs (was 7) and matches portfolioVariance and monteCarlo to their request structs. scenarioAnalysis remains a SKIP because finbench_scenario_request_t uses an abbreviated name — its hand-crafted services.xml schema is intentionally richer and kept. Co-Authored-By: Claude Sonnet 4.6 <[email protected]> --- tools/gen_mcp_schema.py | 52 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tools/gen_mcp_schema.py b/tools/gen_mcp_schema.py index 72e4158946..7cf234fb06 100644 --- a/tools/gen_mcp_schema.py +++ b/tools/gen_mcp_schema.py @@ -124,22 +124,27 @@ def parse_structs(header_text: str) -> dict[str, dict]: Return {struct_name: {field_name: {"c_type": ..., "has_default": bool}}}. Only parses typedef struct { ... } name_t; blocks. - Block and line comments are stripped from the body before field parsing - so that comment text containing ';' is not matched as a field. + Block and line comments are stripped from the FULL header text before the + struct regex runs so that a comment containing a '}' character (e.g. + ``* Defaults: {0.01, 0.05}``) does not prematurely terminate the + [^}]+ body capture and cause the struct to be missed entirely. """ structs = {} - for m in _STRUCT_RE.finditer(header_text): + for m in _STRUCT_RE.finditer(_strip_comments(header_text)): body = m.group(1) name = m.group(2) # Warn about potential nested struct/union — body regex stops at first '}' # so any nested block would already be truncated, but alert the user. + # (Comments are already stripped from header_text before the struct regex + # runs, so braces inside comments will not appear here.) if '{' in body: print(f" WARNING: struct '{name}' body contains '{{' — nested struct/union " f"members are not supported and may be missing from the schema.", file=sys.stderr) - # Strip comments before field parsing (F23 fix) + # Comments were stripped from header_text before _STRUCT_RE ran; + # strip again defensively in case body was extracted differently. clean_body = _strip_comments(body) fields = {} @@ -204,32 +209,53 @@ def build_json_schema(struct_fields: dict) -> dict: # --------------------------------------------------------------------------- # services.xml patcher # --------------------------------------------------------------------------- +def _camel_to_snake(name: str) -> str: + """Convert camelCase / PascalCase to snake_case. + + Examples: + portfolioVariance → portfolio_variance + monteCarlo → monte_carlo + scenarioAnalysis → scenario_analysis + generateTestData → generate_test_data + """ + # Insert underscore before each uppercase letter that follows a lowercase + # letter or digit, then lowercase everything. + result = re.sub(r'(?<=[a-z0-9])([A-Z])', r'_\1', name) + return result.lower() + + def find_request_struct(structs: dict, op_name: str, prefix: str = "") -> str | None: """ Heuristically find the request struct for an operation name. Tries (in order): - {prefix}{op_name}_request_t - {op_name}_request_t - {op_name}_req_t - Falls back to a case-insensitive substring search on all struct names. + 1. {prefix}{op_name}_request_t (as-is) + 2. {prefix}{snake(op_name)}_request_t (camelCase → snake_case) + 3. {op_name}_request_t / {op_name}_req_t (no prefix, as-is) + 4. {snake(op_name)}_request_t (no prefix, snake_case) + 5. Case-insensitive substring search on all struct names. """ + snake = _camel_to_snake(op_name) candidates = [] if prefix: candidates.append(f"{prefix}{op_name}_request_t") + if snake != op_name: + candidates.append(f"{prefix}{snake}_request_t") candidates += [ f"{op_name}_request_t", f"{op_name}_req_t", ] + if snake != op_name: + candidates.append(f"{snake}_request_t") for c in candidates: if c in structs: return c - # Case-insensitive fallback - op_lower = op_name.lower() - for sname in structs: - if op_lower in sname.lower() and "request" in sname.lower(): - return sname + # Case-insensitive fallback — check both original and snake_case op name + for op_lower in (op_name.lower(), snake): + for sname in structs: + if op_lower in sname.lower() and "request" in sname.lower(): + return sname return None
