[I] [R] Caught segfault on benchmark run for TPCH query 21 (scale factor 10) [arrow]

via GitHub Fri, 23 Feb 2024 12:33:24 -0800


paleolimbot opened a new issue, #40217:
URL: https://github.com/apache/arrow/issues/40217


   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   From the performance report for #40197, apparently we get:
   
   ```
    ' *** caught segfault ***', "address 0x3d, cause 'memory not mapped'", '', 
'Traceback:', ' 1: RecordBatchReader__UnsafeDelete(self)', ' 2: 
reader$.unsafe_delete()', ' 3: as_arrow_table.arrow_dplyr_query(x)',
   ```
   
   for the job:
   
   ```
   engine=arrow, format=parquet, language=R, memory_map=False, 
query_id=TPCH-21, scale_factor=10
   ```
   
   `RecordBatchReader__UnsafeDelete()` is something I added but I would have to 
re-look into its use to ensure it is not getting called twice...I believe it 
was introduced to ensure that open files were closed promptly since this caused 
problems on Windows.
   
   Also, huge regression last July:
   
   <img width="443" alt="Screenshot 2024-02-23 at 4 32 28 PM" 
src="https://github.com/apache/arrow/assets/10995762/b6170627-1c00-48ad-bb1c-38a5a37914ca";>
   
   
   https://conbench.ursa.dev/benchmark-results/065d8d9e6ab17d1e8000cb6422edfa64/
   
   Full dump:
   
   <details>
   
   ```
                   ['', ' *** caught segfault ***', "address 0x3d, cause 
'memory not mapped'", '', 'Traceback:', ' 1: 
RecordBatchReader__UnsafeDelete(self)', ' 2: reader$.unsafe_delete()', ' 3: 
as_arrow_table.arrow_dplyr_query(x)', ' 4: as_arrow_table(x)', ' 5: 
doTryCatch(return(expr), name, parentenv, handler)', ' 6: tryCatchOne(expr, 
names, parentenv, handlers[[1L]])', ' 7: tryCatchList(expr, classes, parentenv, 
handlers)', ' 8: tryCatch(as_arrow_table(x), error = function(e, call = 
caller_env(n = 4)) {    augment_io_error_msg(e, call, schema = schema())})', ' 
9: compute.arrow_dplyr_query(x)', '10: collect.arrow_dplyr_query(.)', '11: 
collect_func(.)', '12: input_func("supplier") %>% inner_join(line_items, by = 
c(s_suppkey = "l_suppkey")) %>%     filter(l_receiptdate > l_commitdate) %>% 
inner_join(input_func("nation"),     by = c(s_nationkey = "n_nationkey")) %>% 
filter(n_name ==     "SAUDI ARABIA") %>% group_by(s_name) %>% summarise(numwait 
= n()) %>%     ungroup() %>% arrange(desc
 (numwait), s_name) %>% head(100) %>%     collect_func()', '13: 
query(input_func, collect_func, con)', '14: eval(bm$run, envir = ctx)', '15: 
eval(bm$run, envir = ctx)', '16: eval(expr, p)', '17: eval.parent(...)', '18: 
as_bench_time(.Call(system_time_, substitute(expr), parent.frame()))', '19: 
stats::setNames(as_bench_time(.Call(system_time_, substitute(expr),     
parent.frame())), c("process", "real"))', '20: 
bench::bench_time(eval.parent(...))', '21: eval(expr, p)', '22: 
eval.parent(expr)', '23: with_profiling(profiling, {    timings <- 
bench::bench_time(eval.parent(...))})', '24: force(expr)', '25: with_gc_info({  
  prof_file <- with_profiling(profiling, {        timings <- 
bench::bench_time(eval.parent(...))    })})', '26: measure(eval(bm$run, envir = 
ctx), profiling = profiling, drop_caches = drop_caches)', '27: run_iteration(bm 
= bm, ctx = ctx, profiling = profiling, drop_caches = 
global_params[["drop_caches"]])', '28: withCallingHandlers({    results[[i]] <- 
run_iteration(bm =
  bm, ctx = ctx, profiling = profiling,         drop_caches = 
global_params[["drop_caches"]])}, warning = function(w) {    warnings <<- 
c(warnings, list(list(warning = as.character(w),         stack_trace = 
vapply(traceback(3), function(x) paste(x,             collapse = "\\n"), 
character(1)))))})', '29: doTryCatch(return(expr), name, parentenv, handler)', 
'30: tryCatchOne(expr, names, parentenv, handlers[[1L]])', '31: 
tryCatchList(expr, classes, parentenv, handlers)', '32: 
tryCatch(withCallingHandlers({    results[[i]] <- run_iteration(bm = bm, ctx = 
ctx, profiling = profiling,         drop_caches = 
global_params[["drop_caches"]])}, warning = function(w) {    warnings <<- 
c(warnings, list(list(warning = as.character(w),         stack_trace = 
vapply(traceback(3), function(x) paste(x,             collapse = "\\n"), 
character(1)))))}), error = function(e) {    error <<- list(error = 
as.character(e), stack_trace = vapply(traceback(3),         function(x) 
paste(x, collapse = "\\n"), char
 acter(1)))})', '33: run_bm(format = "parquet", scale_factor = 10, engine = 
"arrow",     memory_map = FALSE, query_id = 21, bm = structure(list(name = 
"tpch",         setup = function(engine = "arrow", query_id = 1:22, format = 
c("native",             "parquet"), scale_factor = c(1, 10), memory_map = 
FALSE,             output = "data_frame", chunk_size = NULL) {            
engine <- match.arg(engine, c("arrow", "duckdb",                 "duckdb_sql", 
"dplyr"))            format <- match.arg(format, c("parquet", "feather",        
         "native"))            stopifnot(`query_id must be an int` = 
query_id%%1 ==                 0, `query_id must 1-22` = query_id >= 1 & 
query_id <=                 22)            output <- match.arg(output, 
c("arrow_table", "data_frame"))            library("dplyr", warn.conflicts = 
FALSE)            collect_func <- collect            if (output == 
"data_frame") {                collect_func <- collect            } else if 
(output == "arrow_table") {   
              collect_func <- compute            }            con <- NULL       
     if (engine %in% c("duckdb", "duckdb_sql")) {                con <- 
DBI::dbConnect(duckdb::duckdb())                DBI::dbExecute(con, 
paste0("PRAGMA threads=",                   getOption("Ncpus")))            }   
         BenchEnvironment(input_func = get_input_func(engine = engine,          
       scale_factor = scale_factor, query_id = query_id,                 format 
= format, con = con, memory_map = memory_map,                 chunk_size = 
chunk_size), query = get_query_func(query_id,                 engine), engine = 
engine, con = con, scale_factor = scale_factor,                 query_id = 
query_id, collect_func = collect_func)        }, before_each = quote({          
  result <- NULL        }), run = quote({            result <- 
query(input_func, collect_func, con)        }), after_each = quote({            
if (scale_factor %in% c(0.01, 0.1, 1, 10)) {                answer <- 
tpch_answer(sca
 le_factor, query_id)                result <- dplyr::as_tibble(result)         
       all_equal_out <- waldo::compare(result, answer,                   
tolerance = 0.01)                if (length(all_equal_out) != 0) {              
    warning(paste0("\\n", all_equal_out, "\\n"))                  stop("The 
answer does not match")                }            } else {                
warning("There is no validation for scale_factors other than 0.01, 0.1, 1, and 
10. Be careful with these results!")            }            result <- NULL     
   }), teardown = quote({            if (!is.null(con)) {                
DBI::dbDisconnect(con, shutdown = TRUE)            }        }), valid_params = 
function(params) {            drop <- (params$engine != "arrow" & params$format 
==                 "feather") | (params$engine != "arrow" & params$output ==    
             "arrow_table") | (params$engine != "arrow" &                 
params$memory_map == TRUE) | (params$engine ==                 "dply
 r" & params$format == "native")            params[!drop, ]        }, 
case_version = function(params) NULL, batch_id_fun = function(params) {         
   batch_id <- uuid()            paste0(batch_id, "-", params$scale_factor, 
substr(params$format,                 1, 1))        }, tags_fun = 
function(params) {            params$query_id <- sprintf("TPCH-%02d", 
params$query_id)            if (!is.null(params$output) && params$output == 
"data_frame") {                params$output <- NULL            }            
params        }, packages_used = function(params) {            c(params$engine, 
"dplyr", "lubridate")        }), class = "Benchmark"), n_iter = 1, batch_id = 
NULL,     profiling = FALSE, global_params = list(cpu_count = NULL,         
lib_path = "latest"), run_id = NULL, run_name = NULL,     run_reason = NULL)', 
'An irrecoverable exception occurred. R is aborting now ...', 'Segmentation 
fault (core dumped)']
   ```
   
   </details>
   
   ### Component(s)
   
   R


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[I] [R] Caught segfault on benchmark run for TPCH query 21 (scale factor 10) [arrow]

Reply via email to