Hi Jeff, I found some difference in results between your function and mine. It also point out a mistake in my code. In the original post, it says: """""""""""
I need to write a loop to march down the rows, and if there are 2 rows in "Count" where there is only 1 NA row between them, sum the two values up and print only one row with the summed Count value and the Position value that corresponds to the larger Count value, thus making the three rows into one. """"""""" Sorry, I read it incorrectly the last time and selected the maximum "Position" value instead of that corresponds to the larger Count value. After correcting the function, there is still some difference between the results. ##fun1() and fun2() corrected fun1 <- function(dat,n) { rl <- rle(is.na(dat[,"Count"])) indx <- which(is.na(dat[,"Count"]))[rep(rl$lengths[rl$values],rl$lengths[rl$values])==n] lst1 <- lapply(split(indx,((seq_along(indx)-1)%/%n)+1),function(x) { x1 <- dat[c(min(x)-1L,x,max(x)+1L),] x2 <- x1[!is.na(x1$Count),] datN <- data.frame(Position=x2$Position[x2$Count %in% max(x2$Count)],Count=sum(x2$Count)) rowN <- row.names(x2)[x2$Count %in% max(x2$Count)] row.names(datN) <- if(length(rowN)>1) rowN[1] else rowN datN }) names(lst1) <- NULL lst1 <- lst1[!duplicated(sapply(lst1,row.names))] ######added dat2 <- do.call(rbind,lst1) indx2 <- sort(unlist(lapply(split(indx,((seq_along(indx)-1)%/%n)+1),function(x) c(min(x)-1L,x,c(max(x)+1L))),use.names=FALSE)) dat1New <- dat[-indx2[!indx2 %in% row.names(dat2)],] dat1New[match(row.names(dat2),row.names(dat1New)),] <- dat2 row.names(dat1New) <- 1:nrow(dat1New) dat1New } ################################## fun2 <- function(dat,n){ indx <- cumsum(c(1,abs(diff(is.na(dat[,"Count"]))))) indx1 <- indx[is.na(dat[,"Count"])] names(indx1) <- which(is.na(dat[,"Count"])) indx2 <- indx1[indx1 %in% names(table(indx1))[table(indx1)==n]] lst1 <- tapply(seq_along(indx2),list(indx2),FUN=function(i) { x1 <- indx2[i] x2 <- as.numeric(names(x1)) x3 <- dat[c(min(x2)-1L,x2,max(x2)+1L),] x4 <- subset(x3, !is.na(Count)) x5 <- data.frame(Position=x4$Position[x4$Count %in% max(x4$Count)],Count=sum(x4$Count)) ind <- x4$Count %in% max(x4$Count) row.names(x5) <- if(sum(ind)>1) row.names(x4)[ind][1] else row.names(x4)[ind] x5 }) attr(lst1,"dimnames") <- NULL dat2 <- do.call(rbind,lst1) indx3 <- sort(unlist(tapply(seq_along(indx2),list(indx2),FUN=function(i) {x1 <- indx2[i] x2 <- as.numeric(names(x1)) c(min(x2)-1L, x2, max(x2)+1L)}),use.names=FALSE)) dat$id <- 1:nrow(dat) dat2$id <- as.numeric(row.names(dat2)) library(plyr) res <- join(dat,dat2[,-1],by="id",type="left") res1 <- res[!((row.names(res) %in% indx3) & is.na(res[,4])),] res1[,2][!is.na(res1[,4])] <- res1[,4][!is.na(res1[,4])] res2 <- res1[,1:2] row.names(res2) <- 1:nrow(res2) res2 } dat1 <- structure(list(Position = c(15L, 22L, 38L, 49L, 55L, 61L, 62L, 14L, 29L, 63L, 46L, 22L, 18L, 24L, 22L, 49L, 42L, 38L, 29L, 22L, 29L, 23L, 42L), Count = c(15L, NA, NA, 5L, NA, 17L, 18L, NA, NA, NA, 8L, NA, 20L, NA, NA, 16L, 19L, NA, NA, NA, 13L, NA, 33L )), .Names = c("Position", "Count"), class = "data.frame", row.names = c(NA, -23L)) fun1(dat1,1) Position Count 1 15 15 2 22 NA 3 38 NA 4 61 22 5 62 18 6 14 NA 7 29 NA 8 63 NA 9 18 28 ### 10 24 NA 11 22 NA 12 49 16 #### 13 42 19 14 38 NA 15 29 NA 16 22 NA 17 42 46 removeNNAs(dat1,1) #gets similar results #but, fun1(fun1(dat1,1),2) Position Count 1 61 37 2 62 18 3 14 NA 4 29 NA 5 63 NA 6 18 44 #######different 7 42 19 8 38 NA 9 29 NA 10 22 NA 11 42 46 removeNNAs(dat1,2,lessOrEqual=TRUE) Position Count 6 61 37 7 62 18 8 14 NA 9 29 NA 10 63 NA 16 49 44 ###### different 17 42 19 18 38 NA 19 29 NA 20 22 NA 23 42 46 > removeNNAs(dat1,3,lessOrEqual=TRUE) Position Count 6 61 37 16 49 62 23 42 65 fun1(fun1(fun1(dat1,1),2),3) Position Count 1 61 37 2 18 62 3 42 65 A.K. On Sunday, October 20, 2013 7:49 PM, Jeff Newmiller <jdnew...@dcn.davis.ca.us> wrote: Looks like a right parenthesis was dropped. Corrected: removeNNAs <- function( dat, N, lessOrEqual=FALSE ) { N1 <- N+1 rx <- rle( !is.na( dat$Count ) ) # indexes of the ends of each run of NAs or non-NAs cs <- cumsum( rx$lengths ) # indexes of the ends of runs of NAs or non-NAs cs2 <- cs[ !rx$values ] # If the first Count is NA, then drop first run of NAs if ( !rx$values[1] ) { cs2 <- cs2[ -1 ] } # If the last Count is NA, then drop last run of NAs if ( !rx$values[ length( rx$values ) ] ) { cs2 <- cs2[ -length( cs2 ) ] } # cs2 is indexes of rows to potentially receive deleted Counts # after collapse cs2 <- cs2 + 1 # cs1 is indexes of non-NA Counts to be deleted cs1 <- cs[ rx$values ][ seq.int( length( cs2 ) ) ] # identify the indexes of the Count values before the strings # of NAs that meet the criteria if ( lessOrEqual ) { idx0 <- N1 >= ( cs2 - cs1 ) } else { idx0 <- N1 == ( cs2 - cs1 ) } idx1 <- cs1[ idx0 ] # identify the indexes of the Count values after the strings of # NAs that meet the criteria idx2 <- cs2[ idx0 ] # Identify which indexes are both sources and destinations idx1c <-c( idx2[ -length( idx2 ) ] == idx1[ -1 ], FALSE ) # identify groups of indexes that need to be merged idx1g <- rev( cumsum( rev( !idx1c ) ) ) # find which elements of idx1 represent the beginning of a # sequence of indexes to be replaced (meta-indexes) srcmidxs <- which( -1 == diff( c( idx1g[ 1 ] + 1, idx1g ) ) ) # find which elements of idx2 represent the end of a sequence # to be replaced (meta-indexes) destmidxs <- which( 1 == rev( diff( rev( c( idx1g, 0 ) ) ) ) ) # add counts from before NAs to destination rows result <- dat srcidxList <- vector( mode="list", length=length( destmidxs ) ) for ( i in seq.int( length( destmidxs ) ) ) { # row to which data will be copied destidx <- idx2[ destmidxs[ i ] ] # sequence of indexes of source rows srcidxss <- seq.int( from=idx1[ srcmidxs[ i ] ], to=destidx - 1 ) result[ destidx, "Count" ] <- ( dat[ destidx, "Count" ] + sum( dat[ srcidxss, "Count" ], na.rm=TRUE ) ) # keep a list of indexes to be removed srcidxList[ i ] <- list( srcidxss ) } # remove source rows result <- result[ -unlist( srcidxList ), ] result } ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.