Thanks a lot Rui. Loops make sense to me. I made one modification to your code. I have thousands of observation, so I would like to run it in parallel. This is my reproducible example:
# Make Data Frame for video actions between given times for user X DataVideoActionT <- function (userX, Time1, Time2, Time3){ #Get data for user X videoActionsX<-subset(videoLectureActions, username==userX) #Time1 = before first attempt videoActionsX_T1<-subset(videoActionsX, eventTimestamp<Time1) #Time2 = before best attemp videoActionsX_T2<-subset(videoActionsX, eventTimestamp<Time2 & eventTimestamp>Time1) #Time3= before last attemp videoActionsX_T3<-subset(videoActionsX, eventTimestamp<Time3 & eventTimestamp>Time1) error1 = sum(videoActionsX_T1$type==" error ") pause1 = sum(videoActionsX_T1$type==" pause ") play1 = sum(videoActionsX_T1$type==" play ") ratechange1 = sum(videoActionsX_T1$type==" ratechange ") seeked1 = sum(videoActionsX_T1$type==" seeked ") stalled1 = sum(videoActionsX_T1$type==" stalled ") error2 = sum(videoActionsX_T2$type==" error ") pause2 = sum(videoActionsX_T2$type==" pause ") play2 = sum(videoActionsX_T2$type==" play ") ratechange2 = sum(videoActionsX_T2$type==" ratechange ") seeked2 = sum(videoActionsX_T2$type==" seeked ") stalled2 = sum(videoActionsX_T2$type==" stalled ") error3 = sum(videoActionsX_T3$type==" error ") pause3 = sum(videoActionsX_T3$type==" pause ") play3 = sum(videoActionsX_T3$type==" play ") ratechange3 = sum(videoActionsX_T3$type==" ratechange ") seeked3 = sum(videoActionsX_T3$type==" seeked ") stalled3 = sum(videoActionsX_T3$type==" stalled ") data<-data.frame(anon_ID=userX, error1 = error1, pause1 = pause1, play1 = play1, ratechange1 = ratechange1, seeked1=seeked1, stalled1=stalled1, error2 = error2, pause2 = pause2, play2 = play2, ratechange2 = ratechange2, seeked2 =seeked2, stalled2 = stalled2, error3 = error3, pause3 = pause3, play3 = play3, ratechange3 = ratechange3, seeked3 = seeked3, stalled3 = stalled3) return(data) } videoLectureActions<-structure(list(username = c("exampleID1", "exampleID1", "exampleID1", "exampleID2", "exampleID2", "exampleID2", "exampleID3", "exampleID3", "exampleID3", "exampleID3"), currentTime = c("103.701247", "103.701247", "107.543877", "107.543877", "116.456507", "116.456507", "119.987188", "177.816693", "183.417124", "183.417124"), playbackRate = c("null", "null", "null", "null", "null", "null", "null", "null", "null", "null"), pause = c("true", "false", "true", "false", "true", "false", "true", "false", "true", "false"), error = c("null", "null", "null", "null", "null", "null", "null", "null", "null", "null"), networkState = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), readyState = c("4", "4", "4", "4", "4", "4", "4", "4", "4", "4"), lectureID = c("exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", "exampleLectureID1"), eventTimestamp = c("2013-03-04 18:51:49", "2013-03-04 18:51:50", "2013-03-04 18:51:54", "2013-03-04 18:51:56", "2013-03-04 18:52:05", "2013-03-04 18:52:07", "2013-03-04 18:52:11", "2013-03-04 18:59:17", "2013-03-04 18:59:23", "2013-03-04 18:59:31" ), initTimestamp = c("2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 18:44:15"), type = c(" pause ", " play ", " pause ", " play ", " pause ", " play ", " pause ", " play ", " pause ", " play "), prevTime = c("103.701247 ", "103.701247 ", "107.543877 ", "107.543877 ", "116.456507 ", "116.456507 ", "119.987188 ", "177.816693 ", "183.417124 ", "183.417124 ")), .Names = c("username", "currentTime", "playbackRate", "pause", "error", "networkState", "readyState", "lectureID", "eventTimestamp", "initTimestamp", "type", "prevTime"), row.names = c(1L, 2L, 5L, 6L, 17L, 21L, 28L, 936L, 957L, 988L), class = "data.frame") data<-structure(list(anon_ID = c("exampleID1", "exampleID2", "exampleID3" ), maxGrade = c(10, 5, 10), firstGrade = c(10, 5, 8), lastGrade = c(10, 5, 10), total_submissions = c(1L, 1L, 3L), Time1 = structure(c(1361993741, 1362356090, 1362357401), class = c("POSIXct", "POSIXt"), tzone = ""), TimeM = structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct", "POSIXt"), tzone = ""), TimeL = structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("anon_ID", "maxGrade", "firstGrade", "lastGrade", "total_submissions", "Time1", "TimeM", "TimeL"), row.names = c(NA, 3L), class = "data.frame") library(foreach) library(doMC) registerDoMC(2) #change the 2 to your number of CPU cores n <- nrow(data) res <- list("vector", n) foreach(i=1:n, .verbose=FALSE, .combine=rbind) %do% { res[[i]] <- with(data, DataVideoActionT(anon_ID[i], Time1[i], TimeM[i], TimeL[i])) } test<-do.call(rbind, res) I have 2 questions. 1. How can I make foreach not print to the console? 2. I want to run this in parallel, I i change the %do% for %dopar% the code stop working. Instead of getting test with 3 observations and 19 variables I get a 2x1 character matrix Thanks! On Sun, Sep 1, 2013 at 3:00 PM, Rui Barradas <ruipbarra...@sapo.pt> wrote: > Hello, > > Your example doesn't really run, but for what I've seen, if your second > data frame is named dat2, something along the lines of > > n <- nrow(dat2) > res <- list("vector", n) > for(i in 1:n){ > res[[i]] <- with(dat2, DataVideoActionT(anon_ID[i], Time1[i], > TimeM[i], TimeL[i])) > } > > do.call(rbind, res) > > > Rui Barradas > > Em 01-09-2013 17:40, Ignacio Martinez escreveu: > >> I hope this reproduceble example helps understand what I'm trying to do. >> >> This is the function: >> >> # Make Data Frame for video actions between given times for user X >> DataVideoActionT <- function (userX, Time1, Time2, Time3){ >> #Get data for user X >> videoActionsX<-subset(**videoLectureActions, username==userX) >> #Time1 = before first attempt >> videoActionsX_T1<-subset(**videoActionsX, eventTimestamp<Time1) >> #Time2 = before best attemp >> videoActionsX_T2<-subset(**videoActionsX, eventTimestamp<Time2 & >> eventTimestamp>Time1) >> #Time3= before last attemp >> videoActionsX_T3<-subset(**videoActionsX, eventTimestamp<Time3 & >> eventTimestamp>Time1) >> >> error1 = sum(videoActionsX_T1$type==" error ") >> pause1 = sum(videoActionsX_T1$type==" pause ") >> play1 = sum(videoActionsX_T1$type==" play ") >> ratechange1 = sum(videoActionsX_T1$type==" ratechange ") >> seeked1 = sum(videoActionsX_T1$type==" seeked ") >> stalled1 = sum(videoActionsX_T1$type==" stalled ") >> >> error2 = sum(videoActionsX_T2$type==" error ") >> pause2 = sum(videoActionsX_T2$type==" pause ") >> play2 = sum(videoActionsX_T2$type==" play ") >> ratechange2 = sum(videoActionsX_T2$type==" ratechange ") >> seeked2 = sum(videoActionsX_T2$type==" seeked ") >> stalled2 = sum(videoActionsX_T2$type==" stalled ") >> >> error3 = sum(videoActionsX_T3$type==" error ") >> pause3 = sum(videoActionsX_T3$type==" pause ") >> play3 = sum(videoActionsX_T3$type==" play ") >> ratechange3 = sum(videoActionsX_T3$type==" ratechange ") >> seeked3 = sum(videoActionsX_T3$type==" seeked ") >> stalled3 = sum(videoActionsX_T3$type==" stalled ") >> >> data<-data.frame(anon_ID=**userX, >> error1 = error1, >> pause1 = pause1, >> play1 = play1, >> ratechange1 = ratechange1, >> seeked1=seeked1, >> stalled1=stalled1, >> error2 = error2, >> pause2 = pause2, >> play2 = play2, >> ratechange2 = ratechange2, >> seeked2 =seeked2, >> stalled2 = stalled2, >> error3 = error3, >> pause3 = pause3, >> play3 = play3, >> ratechange3 = ratechange3, >> seeked3 = seeked3, >> stalled3 = stalled3) >> return(data) >> } >> >> This is the videoActionsX dataframe: >> >> structure(list(username = c("exampleID1", "exampleID1", "exampleID1", >> "exampleID2", "exampleID2", "exampleID2", >> "exampleID3", "exampleID3", >> "exampleID3", "exampleID3"), currentTime = >> c("103.701247", "103.701247", >> >> "107.543877", "107.543877", "116.456507", "116.456507", "119.987188", >> >> "177.816693", "183.417124", "183.417124"), playbackRate = c("null", >> >> "null", "null", >> "null", "null", "null", "null", "null", "null", >> >> "null"), pause = >> c("true", "false", "true", "false", "true", >> >> >> "false", "true", "false", "true", "false"), error = c("null", >> >> >> "null", "null", >> "null", "null", "null", "null", "null", "null", >> >> >> "null"), >> networkState >> = c("1", "1", "1", "1", "1", "1", "1", >> >> >> >> "1", "1", "1"), readyState = c("4", "4", "4", "4", "4", "4", >> >> >> >> "4", "4", "4", "4"), lectureID = >> c("exampleLectureID1", "exampleLectureID1", >> >> >> >> >> "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", >> >> >> >> >> "exampleLectureID1", "exampleLectureID1", "exampleLectureID1", >> >> >> >> >> "exampleLectureID1", "exampleLectureID1"), eventTimestamp = c("2013-03-04 >> 18:51:49", >> >> >> >> >> "2013-03-04 >> 18:51:50", "2013-03-04 18:51:54", "2013-03-04 18:51:56", >> >> >> >> >> "2013-03-04 >> 18:52:05", "2013-03-04 18:52:07", "2013-03-04 18:52:11", >> >> >> >> >> "2013-03-04 >> 18:59:17", "2013-03-04 18:59:23", "2013-03-04 18:59:31" >> >> >> >> ), >> initTimestamp = c("2013-03-04 18:44:15", "2013-03-04 18:44:15", >> >> >> >> >> "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 >> 18:44:15", >> >> >> >> >> "2013-03-04 18:44:15", "2013-03-04 18:44:15", "2013-03-04 >> 18:44:15", >> >> >> >> >> "2013-03-04 18:44:15", "2013-03-04 18:44:15"), type = c(" >> pause ", >> >> >> >> >> " >> play ", " pause ", " play ", " pause ", " play ", " pause ", >> >> >> >> >> " >> play ", " pause ", " play "), prevTime = c("103.701247 ", "103.701247 ", >> >> >> >> >> >> "107.543877 ", "107.543877 ", >> "116.456507 ", "116.456507 ", "119.987188 ", >> >> >> >> >> >> "177.816693 ", "183.417124 ", >> "183.417124 ")), .Names = c("username", >> >> >> >> >> >> >> "currentTime", "playbackRate", "pause", "error", >> "networkState", >> >> >> >> >> >> >> "readyState", "lectureID", "eventTimestamp", >> "initTimestamp", >> >> >> >> >> >> >> "type", "prevTime"), row.names = c(1L, 2L, 5L, 6L, >> 17L, 21L, >> >> >> >> >> >> >> 28L, 936L, 957L, >> 988L), class = "data.frame") >> >> >> >> But with over 2000 observation. >> >> And this is the other data frame >> >> structure(list(anon_ID = c("exampleID1", "exampleID2", "exampleID3" ), >> maxGrade = c(10, 5, 10), firstGrade = c(10, 5, 8), lastGrade = c(10, >> 5, 10), total_submissions = c(1L, 1L, 3L), Time1 = >> structure(c(1361993741, 1362356090, 1362357401), class = c("POSIXct", >> "POSIXt"), tzone = ""), TimeM = structure(c(1361993741, 1362356090, >> 1362492744), class = c("POSIXct", "POSIXt"), tzone = ""), TimeL = >> structure(c(1361993741, 1362356090, 1362492744), class = c("POSIXct", >> "POSIXt"), tzone = "")), .Names = c("anon_ID", "maxGrade", >> "firstGrade", "lastGrade", "total_submissions", "Time1", "TimeM", >> "TimeL"), row.names = c(NA, 3L), class = "data.frame") >> >> >> But with a lot more observations. >> >> >> What I want to do is to call function (userX, Time1, Time2, Time3) >> for all the user in the second data frame where Time1=Time1, >> Time2=TimeM, Time3=TimeL >> >> >> I hope that is more clear. >> >> >> Thanks a lot for all the help! >> >> >> >> On Sun, Sep 1, 2013 at 11:33 AM, Bert Gunter <gunter.ber...@gene.com> >> wrote: >> >> Oh, another possibility is ?mapply, which I should have pointed out in my >>> previous reply. Sorry. >>> >>> -- Bert >>> >>> >>> On Sun, Sep 1, 2013 at 8:30 AM, Bert Gunter <bgun...@gene.com> wrote: >>> >>> Rui et.al.: >>>> >>>> But apply will not work if the data frame has columns of different >>>> classes/types, as appears to be the case here. Viz, from ?apply: >>>> >>>> "If X is not an array but an object of a class with a non-null dim< >>>> http://127.0.0.1:12824/**help/library/base/help/dim<http://127.0.0.1:12824/help/library/base/help/dim> >>>> > >>>> >>>> value (such as a data frame),apply attempts to coerce it to an array >>>> via >>>> as.matrix if it is two-dimensional (e.g., a data frame) or via >>>> as.array. >>>> " >>>> >>>> Simply looping by rows (via for() ) appears to be the simplest and >>>> probably fastest solution. There are other ways via tapply() and >>>> friends, >>>> but these are also essentially loops and are likely to incur some >>>> additional overhead. >>>> >>>> All assuming I understand what the OP has requested, of course. >>>> >>>> Cheers, >>>> >>>> Bert >>>> >>>> >>>> On Sun, Sep 1, 2013 at 7:31 AM, Rui Barradas <ruipbarra...@sapo.pt >>>> >wrote: >>>> >>>> Hello, >>>>> >>>>> Maybe you need apply, not lapply. It seems you want to apply() a >>>>> function to the first dimension of your data.frame, something like >>>>> >>>>> apply(dat, 1, fun) #apply by rows >>>>> >>>>> >>>>> Hope this helps, >>>>> >>>>> Rui Barradas >>>>> >>>>> Em 01-09-2013 15:00, Ignacio Martinez escreveu: >>>>> >>>>> I have a Data Frame that contains, between other things, the following >>>>>> fields: userX, Time1, Time2, Time3. The number of observations is >>>>>> 2000. >>>>>> >>>>>> I have a function that has as inputs userX, Time1, Time2, Time3 and >>>>>> return >>>>>> a data frame with 1 observation and 19 variables. >>>>>> >>>>>> I want to apply that function to all the observations of the first >>>>>> data >>>>>> frame to make a new data frame with 2000 observations and 19 >>>>>> variables. >>>>>> >>>>>> I thought about using lapply, but if I understand correctly, it only >>>>>> takes >>>>>> one variable. >>>>>> >>>>>> Can somebody point me in the right direction? >>>>>> >>>>>> Thanks! >>>>>> >>>>>> [[alternative HTML version deleted]] >>>>>> >>>>>> ______________________________****________________ >>>>>> R-help@r-project.org mailing list >>>>>> https://stat.ethz.ch/mailman/****listinfo/r-help<https://stat.ethz.ch/mailman/**listinfo/r-help> >>>>>> <https://stat.**ethz.ch/mailman/listinfo/r-**help<https://stat.ethz.ch/mailman/listinfo/r-help> >>>>>> > >>>>>> PLEASE do read the posting guide http://www.R-project.org/** >>>>>> posting-guide.html >>>>>> <http://www.R-project.org/**posting-guide.html<http://www.R-project.org/posting-guide.html> >>>>>> > >>>>>> >>>>>> and provide commented, minimal, self-contained, reproducible code. >>>>>> >>>>>> >>>>>> ______________________________****________________ >>>>> R-help@r-project.org mailing list >>>>> https://stat.ethz.ch/mailman/****listinfo/r-help<https://stat.ethz.ch/mailman/**listinfo/r-help> >>>>> <https://stat.**ethz.ch/mailman/listinfo/r-**help<https://stat.ethz.ch/mailman/listinfo/r-help> >>>>> > >>>>> PLEASE do read the posting guide http://www.R-project.org/** >>>>> posting-guide.html >>>>> <http://www.R-project.org/**posting-guide.html<http://www.R-project.org/posting-guide.html> >>>>> > >>>>> >>>>> and provide commented, minimal, self-contained, reproducible code. >>>>> >>>>> >>>> >>>> >>>> -- >>>> >>>> Bert Gunter >>>> Genentech Nonclinical Biostatistics >>>> >>>> Internal Contact Info: >>>> Phone: 467-7374 >>>> Website: >>>> >>>> http://pharmadevelopment.**roche.com/index/pdb/pdb-** >>>> functional-groups/pdb-**biostatistics/pdb-ncb-home.htm<http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm> >>>> >>>> >>>> >>> >>> >>> -- >>> >>> Bert Gunter >>> Genentech Nonclinical Biostatistics >>> >>> Internal Contact Info: >>> Phone: 467-7374 >>> Website: >>> >>> http://pharmadevelopment.**roche.com/index/pdb/pdb-** >>> functional-groups/pdb-**biostatistics/pdb-ncb-home.htm<http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm> >>> >>> >>> >> [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.