This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 5338294fd feat(csharp/test/Drivers/Databricks): Enable RunAsync option
in TExecuteStatementReq (#3171)
5338294fd is described below
commit 5338294fd3e4971c438e6f03d97915ed7bdc178c
Author: Jacky Hu <[email protected]>
AuthorDate: Wed Jul 16 15:48:05 2025 -0700
feat(csharp/test/Drivers/Databricks): Enable RunAsync option in
TExecuteStatementReq (#3171)
## Motivation
Databricks adds `RunAsync` (default is false) option for all the thrift
operations, when it is set as `true`, the operation runs async at the
backend and the status can be polled by calling getStatus, it helps
Databricks backend to better manage capacity and load on client
requests. Furthermore, it can avoid the unnecessary retry on thrift
operation (e.g. TExecuteStatementReq) when the warehouse is stopped or
unavailable, the backend will returns 503 error when `RunAsync` is false
and client has to retry on this thrift operation till the warehouse is
up, this generates lots of queries with 503 errors in the Databricks
query history and consume more resources. When `RunAsync=true`, server
will return 200 with query state `PENDING`, the client will poll the
status till the warehouse is up, this only generates one query in the
query history.
## Change
- Add a connection parameter `adbc.databricks.enable_run_async_thrift`,
default is `false` (it will be changed to `true` later)
- Set `RunAsync` of `TExecuteStatementReq` with above connection
parameter (`RunAsyncInThrift`) in
`DatabricksStatement:SetStatementProperties`
- Fix a bug in `BaseDatabricksReader` by adding `null` check on
`statement.DirectResults.ResultSet`
- Fix the case `TestVarcharExceptionDataDatabricks` in
`StringValuesTest`: when error is in the `DirectResult.status` (case for
`RunAsync=true`), it throw the error with `DisplayMessage` (see
[here](https://github.com/apache/arrow-adbc/blob/main/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs#L316))
instead of the `Message` and `DisplayMessage` does not include some
internal error info such as error exception class at the backend.
## Test
- Run all the E2E tests under `csharp/test/Drivers/Databricks/E2E` when
`Connection.RunAsyncInThrift` is both on and off
---
csharp/src/Drivers/Databricks/BaseDatabricksReader.cs | 2 +-
csharp/src/Drivers/Databricks/DatabricksConnection.cs | 18 ++++++++++++++++++
csharp/src/Drivers/Databricks/DatabricksParameters.cs | 6 ++++++
csharp/src/Drivers/Databricks/DatabricksStatement.cs | 4 ++++
csharp/test/Drivers/Databricks/E2E/StringValueTests.cs | 2 +-
5 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
b/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
index ed47424e8..8de9889b7 100644
--- a/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
+++ b/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
@@ -39,7 +39,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
this.schema = schema;
this.isLz4Compressed = isLz4Compressed;
this.statement = statement;
- if (statement.DirectResults != null &&
!statement.DirectResults.ResultSet.HasMoreRows)
+ if (statement.DirectResults?.ResultSet != null &&
!statement.DirectResults.ResultSet.HasMoreRows)
{
return;
}
diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index a28099939..de8ac098a 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -43,6 +43,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
private bool _enableDirectResults = true;
private bool _enableMultipleCatalogSupport = true;
private bool _enablePKFK = true;
+ private bool _runAsyncInThrift = false;
internal static TSparkGetDirectResults defaultGetDirectResults = new()
{
@@ -165,6 +166,18 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
}
}
+ if
(Properties.TryGetValue(DatabricksParameters.EnableRunAsyncInThriftOp, out
string? enableRunAsyncInThriftStr))
+ {
+ if (bool.TryParse(enableRunAsyncInThriftStr, out bool
enableRunAsyncInThrift))
+ {
+ _runAsyncInThrift = enableRunAsyncInThrift;
+ }
+ else
+ {
+ throw new ArgumentException($"Parameter
'{DatabricksParameters.EnableRunAsyncInThriftOp}' value
'{enableRunAsyncInThriftStr}' could not be parsed. Valid values are 'true' and
'false'.");
+ }
+ }
+
if (Properties.TryGetValue(DatabricksParameters.MaxBytesPerFile,
out string? maxBytesPerFileStr))
{
if (!long.TryParse(maxBytesPerFileStr, out long
maxBytesPerFileValue))
@@ -290,6 +303,11 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
/// </summary>
public bool EnablePKFK => _enablePKFK;
+ /// <summary>
+ /// Enable RunAsync flag in Thrift Operation
+ /// </summary>
+ public bool RunAsyncInThrift => _runAsyncInThrift;
+
/// <summary>
/// Gets a value indicating whether to retry requests that receive a
503 response with a Retry-After header.
/// </summary>
diff --git a/csharp/src/Drivers/Databricks/DatabricksParameters.cs
b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
index 861b7b324..2166fa43f 100644
--- a/csharp/src/Drivers/Databricks/DatabricksParameters.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
@@ -180,6 +180,12 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
/// </summary>
public const string UseDescTableExtended =
"adbc.databricks.use_desc_table_extended";
+ /// <summary>
+ /// Whether to enable RunAsync flag in Thrift operation
+ /// Default value is false if not specified.
+ /// </summary>
+ public const string EnableRunAsyncInThriftOp =
"adbc.databricks.enable_run_async_thrift";
+
/// <summary>
/// Whether to propagate trace parent headers in HTTP requests.
/// Default value is true if not specified.
diff --git a/csharp/src/Drivers/Databricks/DatabricksStatement.cs
b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
index 31e529ed3..d95677384 100644
--- a/csharp/src/Drivers/Databricks/DatabricksStatement.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
@@ -42,6 +42,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
private long maxBytesPerFile;
private bool enableMultipleCatalogSupport;
private bool enablePKFK;
+ private bool runAsyncInThrift;
public DatabricksStatement(DatabricksConnection connection)
: base(connection)
@@ -63,6 +64,8 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
maxBytesPerFile = connection.MaxBytesPerFile;
enableMultipleCatalogSupport =
connection.EnableMultipleCatalogSupport;
enablePKFK = connection.EnablePKFK;
+
+ runAsyncInThrift = connection.RunAsyncInThrift;
}
protected override void SetStatementProperties(TExecuteStatementReq
statement)
@@ -73,6 +76,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
statement.CanDownloadResult = useCloudFetch;
statement.CanDecompressLZ4Result = canDecompressLz4;
statement.MaxBytesPerFile = maxBytesPerFile;
+ statement.RunAsync = runAsyncInThrift;
if (Connection.AreResultsAvailableDirectly)
{
diff --git a/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
b/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
index 52890a518..cba53a279 100644
--- a/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
+++ b/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
@@ -62,7 +62,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
}
[SkippableTheory]
- [InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT",
"DeltaInvariantViolationException" }, "22001")]
+ [InlineData("String whose length is too long for VARCHAR(10).", new
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT" }, "22001")]
public async Task TestVarcharExceptionDataDatabricks(string value,
string[] expectedTexts, string? expectedSqlState)
{
await base.TestVarcharExceptionData(value, expectedTexts,
expectedSqlState);