This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 5338294fd feat(csharp/test/Drivers/Databricks): Enable RunAsync option 
in TExecuteStatementReq (#3171)
5338294fd is described below

commit 5338294fd3e4971c438e6f03d97915ed7bdc178c
Author: Jacky Hu <[email protected]>
AuthorDate: Wed Jul 16 15:48:05 2025 -0700

    feat(csharp/test/Drivers/Databricks): Enable RunAsync option in 
TExecuteStatementReq (#3171)
    
    ## Motivation
    
    Databricks adds `RunAsync` (default is false) option for all the thrift
    operations, when it is set as `true`, the operation runs async at the
    backend and the status can be polled by calling getStatus, it helps
    Databricks backend to better manage capacity and load on client
    requests. Furthermore, it can avoid the unnecessary retry on thrift
    operation (e.g. TExecuteStatementReq) when the warehouse is stopped or
    unavailable, the backend will returns 503 error when `RunAsync` is false
    and client has to retry on this thrift operation till the warehouse is
    up, this generates lots of queries with 503 errors in the Databricks
    query history and consume more resources. When `RunAsync=true`, server
    will return 200 with query state `PENDING`, the client will poll the
    status till the warehouse is up, this only generates one query in the
    query history.
    
    ## Change
    - Add a connection parameter `adbc.databricks.enable_run_async_thrift`,
    default is `false` (it will be changed to `true` later)
    - Set `RunAsync` of `TExecuteStatementReq` with above connection
    parameter (`RunAsyncInThrift`) in
    `DatabricksStatement:SetStatementProperties`
    - Fix a bug in `BaseDatabricksReader` by adding `null` check on
    `statement.DirectResults.ResultSet`
    - Fix the case `TestVarcharExceptionDataDatabricks` in
    `StringValuesTest`: when error is in the `DirectResult.status` (case for
    `RunAsync=true`), it throw the error with `DisplayMessage` (see
    
[here](https://github.com/apache/arrow-adbc/blob/main/csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs#L316))
    instead of the `Message` and `DisplayMessage` does not include some
    internal error info such as error exception class at the backend.
    
    
    ## Test
    - Run all the E2E tests under `csharp/test/Drivers/Databricks/E2E` when
    `Connection.RunAsyncInThrift` is both on and off
---
 csharp/src/Drivers/Databricks/BaseDatabricksReader.cs  |  2 +-
 csharp/src/Drivers/Databricks/DatabricksConnection.cs  | 18 ++++++++++++++++++
 csharp/src/Drivers/Databricks/DatabricksParameters.cs  |  6 ++++++
 csharp/src/Drivers/Databricks/DatabricksStatement.cs   |  4 ++++
 csharp/test/Drivers/Databricks/E2E/StringValueTests.cs |  2 +-
 5 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs 
b/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
index ed47424e8..8de9889b7 100644
--- a/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
+++ b/csharp/src/Drivers/Databricks/BaseDatabricksReader.cs
@@ -39,7 +39,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
             this.schema = schema;
             this.isLz4Compressed = isLz4Compressed;
             this.statement = statement;
-            if (statement.DirectResults != null && 
!statement.DirectResults.ResultSet.HasMoreRows)
+            if (statement.DirectResults?.ResultSet != null && 
!statement.DirectResults.ResultSet.HasMoreRows)
             {
                 return;
             }
diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs 
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index a28099939..de8ac098a 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -43,6 +43,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         private bool _enableDirectResults = true;
         private bool _enableMultipleCatalogSupport = true;
         private bool _enablePKFK = true;
+        private bool _runAsyncInThrift = false;
 
         internal static TSparkGetDirectResults defaultGetDirectResults = new()
         {
@@ -165,6 +166,18 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
                 }
             }
 
+            if 
(Properties.TryGetValue(DatabricksParameters.EnableRunAsyncInThriftOp, out 
string? enableRunAsyncInThriftStr))
+            {
+                if (bool.TryParse(enableRunAsyncInThriftStr, out bool 
enableRunAsyncInThrift))
+                {
+                    _runAsyncInThrift = enableRunAsyncInThrift;
+                }
+                else
+                {
+                    throw new ArgumentException($"Parameter 
'{DatabricksParameters.EnableRunAsyncInThriftOp}' value 
'{enableRunAsyncInThriftStr}' could not be parsed. Valid values are 'true' and 
'false'.");
+                }
+            }
+
             if (Properties.TryGetValue(DatabricksParameters.MaxBytesPerFile, 
out string? maxBytesPerFileStr))
             {
                 if (!long.TryParse(maxBytesPerFileStr, out long 
maxBytesPerFileValue))
@@ -290,6 +303,11 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         /// </summary>
         public bool EnablePKFK => _enablePKFK;
 
+        /// <summary>
+        /// Enable RunAsync flag in Thrift Operation
+        /// </summary>
+        public bool RunAsyncInThrift => _runAsyncInThrift;
+
         /// <summary>
         /// Gets a value indicating whether to retry requests that receive a 
503 response with a Retry-After header.
         /// </summary>
diff --git a/csharp/src/Drivers/Databricks/DatabricksParameters.cs 
b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
index 861b7b324..2166fa43f 100644
--- a/csharp/src/Drivers/Databricks/DatabricksParameters.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksParameters.cs
@@ -180,6 +180,12 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         /// </summary>
         public const string UseDescTableExtended = 
"adbc.databricks.use_desc_table_extended";
 
+        /// <summary>
+        /// Whether to enable RunAsync flag in Thrift operation
+        /// Default value is false if not specified.
+        /// </summary>
+        public const string EnableRunAsyncInThriftOp = 
"adbc.databricks.enable_run_async_thrift";
+
         /// <summary>
         /// Whether to propagate trace parent headers in HTTP requests.
         /// Default value is true if not specified.
diff --git a/csharp/src/Drivers/Databricks/DatabricksStatement.cs 
b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
index 31e529ed3..d95677384 100644
--- a/csharp/src/Drivers/Databricks/DatabricksStatement.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksStatement.cs
@@ -42,6 +42,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
         private long maxBytesPerFile;
         private bool enableMultipleCatalogSupport;
         private bool enablePKFK;
+        private bool runAsyncInThrift;
 
         public DatabricksStatement(DatabricksConnection connection)
             : base(connection)
@@ -63,6 +64,8 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
             maxBytesPerFile = connection.MaxBytesPerFile;
             enableMultipleCatalogSupport = 
connection.EnableMultipleCatalogSupport;
             enablePKFK = connection.EnablePKFK;
+
+            runAsyncInThrift = connection.RunAsyncInThrift;
         }
 
         protected override void SetStatementProperties(TExecuteStatementReq 
statement)
@@ -73,6 +76,7 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
             statement.CanDownloadResult = useCloudFetch;
             statement.CanDecompressLZ4Result = canDecompressLz4;
             statement.MaxBytesPerFile = maxBytesPerFile;
+            statement.RunAsync = runAsyncInThrift;
 
             if (Connection.AreResultsAvailableDirectly)
             {
diff --git a/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs 
b/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
index 52890a518..cba53a279 100644
--- a/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
+++ b/csharp/test/Drivers/Databricks/E2E/StringValueTests.cs
@@ -62,7 +62,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
         }
 
         [SkippableTheory]
-        [InlineData("String whose length is too long for VARCHAR(10).", new 
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT", 
"DeltaInvariantViolationException" }, "22001")]
+        [InlineData("String whose length is too long for VARCHAR(10).", new 
string[] { "DELTA_EXCEED_CHAR_VARCHAR_LIMIT" }, "22001")]
         public async Task TestVarcharExceptionDataDatabricks(string value, 
string[] expectedTexts, string? expectedSqlState)
         {
             await base.TestVarcharExceptionData(value, expectedTexts, 
expectedSqlState);

Reply via email to