Hello,

I'm using R versions 4.1.3 on Windows 10 and I'm having a problem with
memory usage.

Currently, I need to use the arrow and dplyr libraries in a program and
when I compare the memory used between the windows task manager and the
memory.size(max=F) function, the one given by the windows task manager is
much larger, 243.5 MB RAM Windows <https://i.stack.imgur.com/nlWnL.png>,
than the one given by the memory.size(max=F) function, 75.77 MB.

However, I delete objects created with rm() and then use the gc() function
to recover the memory used by the object.

Attached is the R code, with and without output, that I used to present my
problem.

Do you think this memory difference is normal? Could it be caused by the
libraries used and/or by bad practices in using the R language?

I'd like to understand why there's a difference in memory used between the
Windows task manager and R's memory.size(max=F) function.

Thank you for your help, and I remain at your disposal for any further
information you may require.

Best regards,
> gc(verbose = TRUE)
Garbage collection 2 = 0+0+2 (level 2) ... 
14.2 Mbytes of cons cells used (41%)
3.9 Mbytes of vectors used (6%)
         used (Mb) gc trigger (Mb) max used (Mb)
Ncells 264908 14.2     648748 34.7   401965 21.5
Vcells 500529  3.9    8388608 64.0  1671274 12.8
> 
> # basic memory
> memory.size(max=F)
[1] 28.78
> 
> library(arrow, warn.conflicts = FALSE)
> 
> # Memory after loading the arrow library with memory.size
> memory.size(max=F)
[1] 51.01
> 
> # bytes_allocated after loading the arrow library
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after loading the arrow library
> default_memory_pool()$max_memory
[1] 0
> 
> library(dplyr)

Attachement du package : ‘dplyr’

Les objets suivants sont masqués depuis ‘package:stats’:

    filter, lag

Les objets suivants sont masqués depuis ‘package:base’:

    intersect, setdiff, setequal, union

> 
> # Memory after loading the dplyr library with memory.size
> memory.size(max=F)
[1] 90.74
> 
> # bytes_allocated after loading the dplyr library
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after loading the dplyr library
> default_memory_pool()$max_memory
[1] 0
> 
> df <- data.frame(
+   col1 = rnorm(1000000),
+   col2 = rnorm(1000000),
+   col3 = runif(1000000),
+   col4 = sample(1:999, size = 1000000, replace = T),
+   col5 = sample(c("GroupA", "GroupB"), size = 1000000, replace = T),
+   col6 = sample(c("TypeA", "TypeB"), size = 1000000, replace = T)
+ )
> 
> # Memory after df object creation
> memory.size(max=F)
[1] 133.23
> 
> # bytes_allocated after df object creation
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after df object creation
> default_memory_pool()$max_memory
[1] 0
> 
> arrow::write_dataset(
+   df,
+   paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"),
+   format = "parquet"
+ )
> 
> # Memory after writing to disk
> memory.size(max=F)
[1] 120.07
> 
> # bytes_allocated after writing to disk
> default_memory_pool()$bytes_allocated
[1] 19000128
> 
> # max_memory after writing to disk
> default_memory_pool()$max_memory
[1] 27126592
> 
> rm(df)
> 
> # Memory after deletion df
> memory.size(max=F)
[1] 120.07
> 
> # bytes_allocated after deletion df
> default_memory_pool()$bytes_allocated
[1] 19000128
> 
> # max_memory after deletion df
> default_memory_pool()$max_memory
[1] 27126592
> 
> gc(verbose = TRUE)
Garbage collection 15 = 9+2+4 (level 2) ... 
45.0 Mbytes of cons cells used (61%)
38.0 Mbytes of vectors used (49%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  842008   45    1387691 74.2  1387691 74.2
Vcells 4975717   38   10146329 77.5  8388601 64.0
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 101.29
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 27126592
> 
> gc(verbose = TRUE)
Garbage collection 16 = 9+2+5 (level 2) ... 
45.0 Mbytes of cons cells used (61%)
11.3 Mbytes of vectors used (15%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  841895 45.0    1387691 74.2  1387691 74.2
Vcells 1475542 11.3   10146329 77.5  8388601 64.0
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 74.35
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 27126592
> 
> ds <- arrow::open_dataset(paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"))
> 
> # Memory after ds creation
> memory.size(max=F)
[1] 79.01
> 
> # bytes_allocated after ds creation
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after ds creation
> default_memory_pool()$max_memory
[1] 27126592
> 
> req <-
+   ds %>%
+   collect()
> 
> # Memory after req creation
> memory.size(max=F)
[1] 84.46
> 
> # bytes_allocated after req creation
> default_memory_pool()$bytes_allocated
[1] 47504192
> 
> # max_memory after req creation
> default_memory_pool()$max_memory
[1] 83176320
> 
> rm(req)
> 
> # Memory after deletion req
> memory.size(max=F)
[1] 84.47
> 
> # bytes_allocated after deletion req
> default_memory_pool()$bytes_allocated
[1] 47504192
> 
> # max_memory after deletion req
> default_memory_pool()$max_memory
[1] 83176320
> 
> gc(verbose = TRUE)
Garbage collection 17 = 9+2+6 (level 2) ... 
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  927153 49.6    1792975 95.8  1387691 74.2
Vcells 1627339 12.5   10146329 77.5  8388601 64.0
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
> 
> gc(verbose = TRUE)
Garbage collection 18 = 9+2+7 (level 2) ... 
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  927081 49.6    1792975 95.8  1387691 74.2
Vcells 1627219 12.5   10146329 77.5  8388601 64.0
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
> 
> rm(ds)
> 
> # Memory after deletion df
> memory.size(max=F)
[1] 75.8
> 
> # bytes_allocated after deletion df
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after deletion df
> default_memory_pool()$max_memory
[1] 83176320
> 
> gc(verbose = TRUE)
Garbage collection 19 = 9+2+8 (level 2) ... 
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  926997 49.6    1792975 95.8  1387691 74.2
Vcells 1627193 12.5   10146329 77.5  8388601 64.0
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
> 
> gc(verbose = TRUE)
Garbage collection 20 = 9+2+9 (level 2) ... 
49.6 Mbytes of cons cells used (52%)
12.5 Mbytes of vectors used (16%)
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  926988 49.6    1792975 95.8  1387691 74.2
Vcells 1627178 12.5   10146329 77.5  8388601 64.0
> 
> # Memory after gc(verbose = TRUE)
> memory.size(max=F)
[1] 75.8
> 
> # bytes_allocated after gc(verbose = TRUE)
> default_memory_pool()$bytes_allocated
[1] 0
> 
> # max_memory after gc(verbose = TRUE)
> default_memory_pool()$max_memory
[1] 83176320
gc(verbose = TRUE)

# basic memory
memory.size(max=F)

library(arrow, warn.conflicts = FALSE)

# Memory after loading the arrow library with memory.size
memory.size(max=F)

# bytes_allocated after loading the arrow library
default_memory_pool()$bytes_allocated

# max_memory after loading the arrow library
default_memory_pool()$max_memory

library(dplyr)

# Memory after loading the dplyr library with memory.size
memory.size(max=F)

# bytes_allocated after loading the dplyr library
default_memory_pool()$bytes_allocated

# max_memory after loading the dplyr library
default_memory_pool()$max_memory

df <- data.frame(
  col1 = rnorm(1000000),
  col2 = rnorm(1000000),
  col3 = runif(1000000),
  col4 = sample(1:999, size = 1000000, replace = T),
  col5 = sample(c("GroupA", "GroupB"), size = 1000000, replace = T),
  col6 = sample(c("TypeA", "TypeB"), size = 1000000, replace = T)
)

# Memory after df object creation
memory.size(max=F)

# bytes_allocated after df object creation
default_memory_pool()$bytes_allocated

# max_memory after df object creation
default_memory_pool()$max_memory

arrow::write_dataset(
  df,
  paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"),
  format = "parquet"
)

# Memory after writing to disk
memory.size(max=F)

# bytes_allocated after writing to disk
default_memory_pool()$bytes_allocated

# max_memory after writing to disk
default_memory_pool()$max_memory

rm(df)

# Memory after deletion df
memory.size(max=F)

# bytes_allocated after deletion df
default_memory_pool()$bytes_allocated

# max_memory after deletion df
default_memory_pool()$max_memory

gc(verbose = TRUE)

# Memory after gc(verbose = TRUE)
memory.size(max=F)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory

gc(verbose = TRUE)

# Memory after gc(verbose = TRUE)
memory.size(max=F)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory

ds <- arrow::open_dataset(paste0(Sys.getenv("USERPROFILE"),"/ExProblemeGc"))

# Memory after ds creation
memory.size(max=F)

# bytes_allocated after ds creation
default_memory_pool()$bytes_allocated

# max_memory after ds creation
default_memory_pool()$max_memory

req <-
  ds %>%
  collect()

# Memory after req creation
memory.size(max=F)

# bytes_allocated after req creation
default_memory_pool()$bytes_allocated

# max_memory after req creation
default_memory_pool()$max_memory

rm(req)

# Memory after deletion req
memory.size(max=F)

# bytes_allocated after deletion req
default_memory_pool()$bytes_allocated

# max_memory after deletion req
default_memory_pool()$max_memory

gc(verbose = TRUE)

# Memory after gc(verbose = TRUE)
memory.size(max=F)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory

gc(verbose = TRUE)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory

# Memory after gc(verbose = TRUE)
memory.size(max=F)

rm(ds)

# Memory after deletion df
memory.size(max=F)

# bytes_allocated after deletion df
default_memory_pool()$bytes_allocated

# max_memory after deletion df
default_memory_pool()$max_memory

gc(verbose = TRUE)

# Memory after gc(verbose = TRUE)
memory.size(max=F)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory

gc(verbose = TRUE)

# Memory after gc(verbose = TRUE)
memory.size(max=F)

# bytes_allocated after gc(verbose = TRUE)
default_memory_pool()$bytes_allocated

# max_memory after gc(verbose = TRUE)
default_memory_pool()$max_memory
______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to