Maybe efficiency is less of an issue than I thought, on 2 million rows, it only took a bit over a minute, and my system only jumped up ~ 600 MB of memory so it wasn't much of a strain there either.
> data.test <- matrix( + sample(seq(min(yourdata),max(yourdata)), size = 10000000, replace = TRUE), + ncol = 5) > nrow(data.test) [1] 2000000 > system.time(my.finder(data.test)) user system elapsed 74.54 0.20 75.20 On Fri, Jul 23, 2010 at 11:01 PM, Joshua Wiley <jwiley.ps...@gmail.com> wrote: > Hi, > > Here is a little function that will do what you want and return a nice output: > > #Function To calculate top two values and return > my.finder <- function(mydata) { > my.fun <- function(data) { > strongest <- which.max(data) > secondstrongest <- which.max(data[-strongest]) > strongestantenna <- names(data)[strongest] > secondstrongantenna <- names(data[-strongest])[secondstrongest] > value <- matrix(c(data[strongest], data[secondstrongest], > strongestantenna, secondstrongantenna), ncol =4) > return(value) > } > dat <- apply(mydata, 1, my.fun) > dat <- t(dat) > dat <- as.data.frame(dat, stringsAsFactors = FALSE) > colnames(dat) <- c("strongest", "secondstrongest", > "strongestantenna", "secondstrongantenna") > dat[ , "strongest"] <- as.numeric(dat[ , "strongest"]) > dat[ , "secondstrongest"] <- as.numeric(dat[ , "secondstrongest"]) > return(dat) > } > > > #Using your example data: > > yourdata <- structure(list(value0 = c(-13007L, -12838L, -12880L, -12805L, > -12834L, -11068L, -12807L, -12770L, -12988L, -11779L), value60 = c(-11707L, > -13210L, -11778L, -11653L, -13527L, -11698L, -14068L, -11665L, > -11736L, -12873L), value120 = c(-11072L, -11176L, -11113L, -11071L, > -11067L, -12430L, -11092L, -11061L, -11137L, -12973L), value180 = c(-12471L, > -11799L, -12439L, -12385L, -11638L, -12430L, -11709L, -12373L, > -12570L, -12537L), value240 = c(-12838L, -13210L, -13089L, -11561L, > -13527L, -12430L, -11607L, -11426L, -13467L, -12973L), value300 = c(-13357L, > -13845L, -13880L, -13317L, -13873L, -12814L, -13025L, -12805L, > -13739L, -11146L)), .Names = c("value0", "value60", "value120", > "value180", "value240", "value300"), class = "data.frame", row.names = c("1", > "2", "3", "4", "5", "6", "7", "8", "9", "10")) > > my.finder(yourdata) #and what you want is in a nicely labeled data frame > > #A potential problem is that it is not very efficient > > #Here is a test using a matrix of 100,000 rows > #sampled from the same range as your data > #with the same number of columns > > data.test <- matrix( > sample(seq(min(yourdata),max(yourdata)), size = 500000, replace = TRUE), > ncol = 5) > > system.time(my.finder(data.test)) > > #On my system I get > >> system.time(my.finder(data.test)) > user system elapsed > 2.89 0.00 2.89 > > Hope that helps, > > Josh > > > > On Fri, Jul 23, 2010 at 6:20 PM, <mpw...@illinois.edu> wrote: >> I have a data frame with a couple million lines and want to retrieve the >> largest and second largest values in each row, along with the label of the >> column these values are in. For example >> >> row 1 >> strongest=-11072 >> secondstrongest=-11707 >> strongestantenna=value120 >> secondstrongantenna=value60 >> >> Below is the code I am using and a truncated data.frame. Retrieving the >> largest value was easy, but I have been getting errors every way I have >> tried to retrieve the second largest value. I have not even tried to >> retrieve the labels for the value yet. >> >> Any help would be appreciated >> Mike >> >> >>> data<-data.frame(value0,value60,value120,value180,value240,value300) >>> data >> value0 value60 value120 value180 value240 value300 >> 1 -13007 -11707 -11072 -12471 -12838 -13357 >> 2 -12838 -13210 -11176 -11799 -13210 -13845 >> 3 -12880 -11778 -11113 -12439 -13089 -13880 >> 4 -12805 -11653 -11071 -12385 -11561 -13317 >> 5 -12834 -13527 -11067 -11638 -13527 -13873 >> 6 -11068 -11698 -12430 -12430 -12430 -12814 >> 7 -12807 -14068 -11092 -11709 -11607 -13025 >> 8 -12770 -11665 -11061 -12373 -11426 -12805 >> 9 -12988 -11736 -11137 -12570 -13467 -13739 >> 10 -11779 -12873 -12973 -12537 -12973 -11146 >>> #largest value in the row >>> strongest<-apply(data,1,max) >>> >>> >>> #second largest value in the row >>> n<-function(data)(1/(min(1/(data[1,]-max(data[1,]))))+ (max(data[1,]))) >>> secondstrongest<-apply(data,1,n) >> Error in data[1, ] : incorrect number of dimensions >>> >> >> ______________________________________________ >> R-help@r-project.org mailing list >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. >> > > > > -- > Joshua Wiley > Ph.D. Student, Health Psychology > University of California, Los Angeles > http://www.joshuawiley.com/ > -- Joshua Wiley Ph.D. Student, Health Psychology University of California, Los Angeles http://www.joshuawiley.com/ ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.