paleolimbot opened a new issue, #1921:
URL: https://github.com/apache/arrow-adbc/issues/1921

   ### What happened?
   
   When bulk inserting a very large array into we get an error that does not 
occur when bulk inserting many small arrays.
   
   ### How can we reproduce the bug?
   
   From R:
   
   ```r
   library(adbcdrivermanager)
   
   # docker compose up postgres-test
   test_uri <- 
"postgresql://localhost:5432/postgres?user=postgres&password=password"
   
   db <- adbc_database_init(adbcpostgresql::adbcpostgresql(), uri = test_uri)
   
   # Many arrays
   arr <- nanoarrow::as_nanoarrow_array(nycflights13::flights)
   stream <- nanoarrow::basic_array_stream(rep(list(arr), 100))
   write_adbc(stream, db, "flights")
   db |> execute_adbc("DROP TABLE flights")
   
   # One big array
   arr <- nanoarrow::as_nanoarrow_array(
     vctrs::vec_rep(nycflights13::flights, 100)
   )
   stream <- nanoarrow::basic_array_stream(list(arr))
   write_adbc(stream, db, "flights")
   #> Error in adbc_statement_execute_query(stmt): IO: Error writing tuple 
field data: server closed the connection unexpectedly
   #>  This probably means the server terminated abnormally
   #>  before or while processing the request.
   db |> execute_adbc("DROP TABLE flights")
   ```
   
   I don't think this is a result of nanoarrow's array creation:
   
   ``` r
   library(nanoarrow)
   #> Warning: package 'nanoarrow' was built under R version 4.3.3
   library(arrow, warn.conflicts = FALSE)
   #> Warning: package 'arrow' was built under R version 4.3.3
   
   # Many arrays
   arr <- nanoarrow::as_nanoarrow_array(nycflights13::flights)
   stream <- nanoarrow::basic_array_stream(rep(list(arr), 100))
   tab <- arrow::as_arrow_table(stream)
   tab$ValidateFull()
   #> [1] TRUE
   
   # One big array
   arr <- nanoarrow::as_nanoarrow_array(
     vctrs::vec_rep(nycflights13::flights, 100)
   )
   stream <- nanoarrow::basic_array_stream(list(arr))
   tab2 <- arrow::as_arrow_table(stream)
   tab2$ValidateFull()
   #> [1] TRUE
   
   tab2$Equals(tab)
   #> [1] TRUE
   
   tab
   #> Table
   #> 33677600 rows x 19 columns
   #> $year <int32>
   #> $month <int32>
   #> $day <int32>
   #> $dep_time <int32>
   #> $sched_dep_time <int32>
   #> $dep_delay <double>
   #> $arr_time <int32>
   #> $sched_arr_time <int32>
   #> $arr_delay <double>
   #> $carrier <string>
   #> $flight <int32>
   #> $tailnum <string>
   #> $origin <string>
   #> $dest <string>
   #> $air_time <double>
   #> $distance <double>
   #> $hour <double>
   #> $minute <double>
   #> $time_hour <timestamp[us, tz=America/New_York]>
   ```
   
   
   ### Environment/Setup
   
   ```r
   sessioninfo::session_info()
   #> ─ Session info 
───────────────────────────────────────────────────────────────
   #>  setting  value
   #>  version  R version 4.3.1 (2023-06-16)
   #>  os       macOS Sonoma 14.5
   #>  system   aarch64, darwin20
   #>  ui       X11
   #>  language (EN)
   #>  collate  en_US.UTF-8
   #>  ctype    en_US.UTF-8
   #>  tz       America/Halifax
   #>  date     2024-06-16
   #>  pandoc   3.1.11 @ 
/Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via 
rmarkdown)
   #> 
   #> ─ Packages 
───────────────────────────────────────────────────────────────────
   #>  package           * version     date (UTC) lib source
   #>  adbcdrivermanager * 0.12.0.9000 2024-06-16 [1] local
   #>  arrow             * 16.1.0      2024-06-03 [1] 
https://apache.r-universe.dev (R 4.3.3)
   #>  assertthat          0.2.1       2019-03-21 [1] CRAN (R 4.3.0)
   #>  bit                 4.0.5       2022-11-15 [1] CRAN (R 4.3.0)
   #>  bit64               4.0.5       2020-08-30 [1] CRAN (R 4.3.0)
   #>  cli                 3.6.2       2023-12-11 [1] CRAN (R 4.3.1)
   #>  digest              0.6.34      2024-01-11 [1] CRAN (R 4.3.1)
   #>  evaluate            0.23        2023-11-01 [1] CRAN (R 4.3.1)
   #>  fastmap             1.1.1       2023-02-24 [1] CRAN (R 4.3.0)
   #>  fs                  1.6.3       2023-07-20 [1] CRAN (R 4.3.0)
   #>  glue                1.7.0       2024-01-09 [1] CRAN (R 4.3.1)
   #>  htmltools           0.5.7       2023-11-03 [1] CRAN (R 4.3.1)
   #>  knitr               1.45        2023-10-30 [1] CRAN (R 4.3.1)
   #>  lifecycle           1.0.4       2023-11-07 [1] CRAN (R 4.3.1)
   #>  magrittr            2.0.3       2022-03-30 [1] CRAN (R 4.3.0)
   #>  nanoarrow         * 0.5.0.1     2024-05-31 [1] CRAN (R 4.3.3)
   #>  purrr               1.0.2       2023-08-10 [1] CRAN (R 4.3.0)
   #>  R.cache             0.16.0      2022-07-21 [1] CRAN (R 4.3.0)
   #>  R.methodsS3         1.8.2       2022-06-13 [1] CRAN (R 4.3.0)
   #>  R.oo                1.26.0      2024-01-24 [1] CRAN (R 4.3.1)
   #>  R.utils             2.12.3      2023-11-18 [1] CRAN (R 4.3.1)
   #>  R6                  2.5.1       2021-08-19 [1] CRAN (R 4.3.0)
   #>  reprex              2.1.0       2024-01-11 [1] CRAN (R 4.3.1)
   #>  rlang               1.1.3       2024-01-10 [1] CRAN (R 4.3.1)
   #>  rmarkdown           2.26        2024-03-05 [1] CRAN (R 4.3.1)
   #>  rstudioapi          0.15.0      2023-07-07 [1] CRAN (R 4.3.0)
   #>  sessioninfo         1.2.2       2021-12-06 [1] CRAN (R 4.3.0)
   #>  styler              1.10.2      2023-08-29 [1] CRAN (R 4.3.0)
   #>  tidyselect          1.2.1       2024-03-11 [1] CRAN (R 4.3.1)
   #>  vctrs               0.6.5       2023-12-01 [1] CRAN (R 4.3.1)
   #>  withr               3.0.0       2024-01-16 [1] CRAN (R 4.3.1)
   #>  xfun                0.42        2024-02-08 [1] CRAN (R 4.3.1)
   #>  yaml                2.3.8       2023-12-11 [1] CRAN (R 4.3.1)
   #> 
   #>  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
   #> 
   #> 
──────────────────────────────────────────────────────────────────────────────
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to