Hello,
I'm using R versions 4.1.3 on Windows 10 and I'm having a problem with
memory usage.
Currently, I need to use the arrow and dplyr libraries in a program and
when I compare the memory used between the windows task manager and the
memory.size(max=F) function, the one given by the windows task manager is
much larger, 243.5 MB RAM Windows <https://i.stack.imgur.com/nlWnL.png>,
than the one given by the memory.size(max=F) function, 75.77 MB.
However, I delete objects created with rm() and then use the gc() function
to recover the memory used by the object.
Attached is the R code, with and without output, that I used to present my
problem.
Do you think this memory difference is normal? Could it be caused by the
libraries used and/or by bad practices in using the R language?
I'd like to understand why there's a difference in memory used between the
Windows task manager and R's memory.size(max=F) function.
Thank you for your help, and I remain at your disposal for any further
information you may require.
Best regards,
> gc(verbose = TRUE)
Garbage collection 2 = 0+0+2 (level 2) ...
14.2 Mbytes of cons cells used (41%)
3.9 Mbytes of vectors used (6%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 264908 14.2 648748 34.7 401965 21.5
Vcells 500529 3.9 8388608 64.0 1671274 12.8
>
> # basic memory
> memory.size(max=F)
[1] 28.78
>
> library(arrow, warn.conflicts = FALSE)
>
> # Memory after loading the arrow library with memory.size
> memory.size(max=F)
[1] 51.01
>
> # bytes_allocated after loading the arrow library
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after loading the arrow library
> default_memory_pool()$max_memory
[1] 0
>
> library(dplyr)
Attachement du package : ‘dplyr’
Les objets suivants sont masqués depuis ‘package:stats’:
filter, lag
Les objets suivants sont masqués depuis ‘package:base’:
intersect, setdiff, setequal, union
>
> # Memory after loading the dplyr library with memory.size
> memory.size(max=F)
[1] 90.74
>
> # bytes_allocated after loading the dplyr library
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after loading the dplyr library
> default_memory_pool()$max_memory
[1] 0
>
> df <- data.frame(
+ col1 = rnorm(1000000),
+ col2 = rnorm(1000000),
+ col3 = runif(1000000),
+ col4 = sample(1:999, size = 1000000, replace = T),
+ col5 = sample(c("GroupA", "GroupB"), size = 1000000, replace = T),
+ col6 = sample(c("TypeA", "TypeB"), size = 1000000, replace = T)
+ )
>
> # Memory after df object creation
> memory.size(max=F)
[1] 133.23
>
> # bytes_allocated after df object creation
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after df object creation
> default_memory_pool()$max_memory
[1] 0
>
> arrow::write_dataset(
+ df,
+ paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"),
+ format = "parquet"
+ )
>
> # Memory after writing to disk
> memory.size(max=F)
[1] 120.07
>
> # bytes_allocated after writing to disk
> default_memory_pool()$bytes_allocated
[1] 19000128
>
> # max_memory after writing to disk
> default_memory_pool()$max_memory
[1] 27126592
>
> rm(df)
>
> # Memory after deletion df
> memory.size(max=F)
[1] 120.07
>
> # bytes_allocated after deletion df
> default_memory_pool()$bytes_allocated
[1] 19000128
>
> # max_memory after deletion df
> default_memory_pool()$max_memory
[1] 27126592
>
> gc(verbose = TRUE)
Garbage collection 15 = 9+2+4 (level 2) ...
45.0 Mbytes of cons cells used (61%)
38.0 Mbytes of vectors used (49%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842008 45 1387691 74.2 1387691 74.2
Vcells 4975717 38 10146329 77.5 8388601 64.0
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 101.29
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 27126592
>
> gc(verbose = TRUE)
Garbage collection 16 = 9+2+5 (level 2) ...
45.0 Mbytes of cons cells used (61%)
11.3 Mbytes of vectors used (15%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 841895 45.0 1387691 74.2 1387691 74.2
Vcells 1475542 11.3 10146329 77.5 8388601 64.0
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 74.35
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 27126592
>
> ds <- arrow::open_dataset(paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"))
>
> # Memory after ds creation
> memory.size(max=F)
[1] 79.01
>
> # bytes_allocated after ds creation
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after ds creation
> default_memory_pool()$max_memory
[1] 27126592
>
> req <-
+ ds %>%
+ collect()
>
> # Memory after req creation
> memory.size(max=F)
[1] 84.46
>
> # bytes_allocated after req creation
> default_memory_pool()$bytes_allocated
[1] 47504192
>
> # max_memory after req creation
> default_memory_pool()$max_memory
[1] 83176320
>
> rm(req)
>
> # Memory after deletion req
> memory.size(max=F)
[1] 84.47
>
> # bytes_allocated after deletion req
> default_memory_pool()$bytes_allocated
[1] 47504192
>
> # max_memory after deletion req
> default_memory_pool()$max_memory
[1] 83176320
>
> gc(verbose = TRUE)
Garbage collection 17 = 9+2+6 (level 2) ...
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 927153 49.6 1792975 95.8 1387691 74.2
Vcells 1627339 12.5 10146329 77.5 8388601 64.0
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
>
> gc(verbose = TRUE)
Garbage collection 18 = 9+2+7 (level 2) ...
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 927081 49.6 1792975 95.8 1387691 74.2
Vcells 1627219 12.5 10146329 77.5 8388601 64.0
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
>
> rm(ds)
>
> # Memory after deletion df
> memory.size(max=F)
[1] 75.8
>
> # bytes_allocated after deletion df
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after deletion df
> default_memory_pool()$max_memory
[1] 83176320
>
> gc(verbose = TRUE)
Garbage collection 19 = 9+2+8 (level 2) ...
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 926997 49.6 1792975 95.8 1387691 74.2
Vcells 1627193 12.5 10146329 77.5 8388601 64.0
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
>
> gc(verbose = TRUE)
Garbage collection 20 = 9+2+9 (level 2) ...
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 926988 49.6 1792975 95.8 1387691 74.2
Vcells 1627178 12.5 10146329 77.5 8388601 64.0
>
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
>
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
>
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
gc(verbose = TRUE)
# basic memory
memory.size(max=F)
library(arrow, warn.conflicts = FALSE)
# Memory after loading the arrow library with memory.size
memory.size(max=F)
# bytes_allocated after loading the arrow library
default_memory_pool()$bytes_allocated
# max_memory after loading the arrow library
default_memory_pool()$max_memory
library(dplyr)
# Memory after loading the dplyr library with memory.size
memory.size(max=F)
# bytes_allocated after loading the dplyr library
default_memory_pool()$bytes_allocated
# max_memory after loading the dplyr library
default_memory_pool()$max_memory
df <- data.frame(
col1 = rnorm(1000000),
col2 = rnorm(1000000),
col3 = runif(1000000),
col4 = sample(1:999, size = 1000000, replace = T),
col5 = sample(c("GroupA", "GroupB"), size = 1000000, replace = T),
col6 = sample(c("TypeA", "TypeB"), size = 1000000, replace = T)
)
# Memory after df object creation
memory.size(max=F)
# bytes_allocated after df object creation
default_memory_pool()$bytes_allocated
# max_memory after df object creation
default_memory_pool()$max_memory
arrow::write_dataset(
df,
paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"),
format = "parquet"
)
# Memory after writing to disk
memory.size(max=F)
# bytes_allocated after writing to disk
default_memory_pool()$bytes_allocated
# max_memory after writing to disk
default_memory_pool()$max_memory
rm(df)
# Memory after deletion df
memory.size(max=F)
# bytes_allocated after deletion df
default_memory_pool()$bytes_allocated
# max_memory after deletion df
default_memory_pool()$max_memory
gc(verbose = TRUE)
# Memory after gc(verbose = TRUE)
memory.size(max=F)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
gc(verbose = TRUE)
# Memory after gc(verbose = TRUE)
memory.size(max=F)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
ds <- arrow::open_dataset(paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"))
# Memory after ds creation
memory.size(max=F)
# bytes_allocated after ds creation
default_memory_pool()$bytes_allocated
# max_memory after ds creation
default_memory_pool()$max_memory
req <-
ds %>%
collect()
# Memory after req creation
memory.size(max=F)
# bytes_allocated after req creation
default_memory_pool()$bytes_allocated
# max_memory after req creation
default_memory_pool()$max_memory
rm(req)
# Memory after deletion req
memory.size(max=F)
# bytes_allocated after deletion req
default_memory_pool()$bytes_allocated
# max_memory after deletion req
default_memory_pool()$max_memory
gc(verbose = TRUE)
# Memory after gc(verbose = TRUE)
memory.size(max=F)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
gc(verbose = TRUE)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
# Memory after gc(verbose = TRUE)
memory.size(max=F)
rm(ds)
# Memory after deletion df
memory.size(max=F)
# bytes_allocated after deletion df
default_memory_pool()$bytes_allocated
# max_memory after deletion df
default_memory_pool()$max_memory
gc(verbose = TRUE)
# Memory after gc(verbose = TRUE)
memory.size(max=F)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
gc(verbose = TRUE)
# Memory after gc(verbose = TRUE)
memory.size(max=F)
# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated
# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.