Apologies, I thought I was following exactly that sentence and trying to make a minimal post that would waste as little developer bandwidth as possible given the lack of a better system.
Anyway, I have been using R for like forever (20 years). In my current project, I have run into problems with stack overflows in R's dendrogram code when trying to use either str() or as.hclust() on very deep dendrograms. To duplicate, use this function from tests/reg-tests-1c.R in the R source code: mkDend <- function(n, lab, method = "complete", ## gives *ties* often: rGen = function(n) 1+round(16*abs(rnorm(n)))) { stopifnot(is.numeric(n), length(n) == 1, n >= 1, is.character(lab)) a <- matrix(rGen(n*n), n, n) colnames(a) <- rownames(a) <- paste0(lab, 1:n) .HC. <<- hclust(as.dist(a + t(a)), method=method) as.dendrogram(.HC.) } Get a nasty dendrogram: de <- mkDend(2000, 'x', 'single') 1st bug: sink('somefile.txt'); str(de); sink(); What happens: Error in getOption("OutDec") : node stack overflow Also, the last call to sink() isn't executed because of the error, so you'll need to call sink() after the error to clear the diversion. What should happen: Function completes without a stack overflow. 2nd bug: hh <- as.hclust(de) What happens: Error: C stack usage 7971248 is too close to the limit What should happen: Function completes without a stack overflow. A knowledgeable user might be able to increase R's limits to avoid these errors on this particular dendrogram, but a) my users aren't that knowledgeable about R and this is expected to be a common problem, and b) there will be bigger dendrograms (up to at least 25000 leaves). Please see attached patch for non-recursive implementations. Regards, Bradley On Mon, Mar 6, 2017 at 3:50 AM, Martin Maechler <maech...@stat.math.ethz.ch> wrote: > >>>>> Bradley Broom <bmbr...@gmail.com> > >>>>> on Sun, 5 Mar 2017 16:03:30 -0600 writes: > > > Please add me to R bugzilla. Thanks, Bradley > > Well, I will not do it just like that (mean "after such a > minimal message"). > > I don't see any evidence as to your credentials, knowledge of R, > etc, as part of this request. We are all professionals, > devoting part of our (work and free) time to the R project > (rather than employees of the company you paid to serve you ...) > > It may be that you have read https://www.r-project.org/bugs.html > > Notably this part > > --> NOTE: due to abuse by spammers, since 2016-07-09 only users who have > previously submitted bugs can submit new ones on R’s Bugzilla. We’re > working on a better system… In the mean time, post (e-mail) to R-devel or > ask an R Core member to add you manually to R’s Bugzilla members. > > The last sentence was *meant* to say you should post (possibly > parts, ideally a minimal reproducible example of) your bug > report to R-devel so others could comment on it, agree or > disagree with your assessment etc, > __or__ ask an R-core member to add you to bugzilla (if you really read the > other parts of the 'R bugs' web page above). > > Posting to all 1000 R-devel readers with no content about what > you consider a bug is a waste of bandwidth for at least 99% of > these readers. > > [Yes, I'm also using their time ... in the hope to *improve* the > quality of future such postings]. > > Martin Maechler > ETH Zurich >
Index: src/library/stats/R/dendrogram.R =================================================================== --- src/library/stats/R/dendrogram.R (revision 72314) +++ src/library/stats/R/dendrogram.R (working copy) @@ -81,60 +81,130 @@ structure(z[[as.character(k)]], class = "dendrogram") } +# Count the number of leaves in a dendrogram. +nleaves <- function (node) { + if (is.leaf(node)) { return (1L) } + todo <- NULL # Non-leaf nodes to traverse after this one. + count <- 0L + repeat { + # For each child: count iff a leaf, add to todo list otherwise. + while (length(node)) { + child <- node[[1L]]; + node <- node[-1L]; + if (is.leaf(child)) { + count <- count + 1L + } else { + todo <- list(node=child, todo=todo) + } + } + # Advance to next node, terminating when no nodes left to count. + if (is.null(todo)) { + break + } else { + node <- todo$node + todo <- todo$todo + } + } + return (count) +} + ## Reversing the above (as much as possible) ## is only possible for dendrograms with *binary* splits as.hclust.dendrogram <- function(x, ...) { - stopifnot(is.list(x), length(x) == 2) - n <- length(ord <- as.integer(unlist(x))) + stopifnot(is.list(x), length(x) == 2L) + n <- nleaves(x) + stopifnot(n == attr(x, "members")) + + # Ord and labels for each leaf node (in preorder). + ord <- integer(n) + labsu <- character(n) + + # Height and (parent,index) for each internal node (in preorder). + n.h <- n - 1L + height <- numeric(n.h) + myIdx <- matrix(NA_integer_, 2L, n.h) + + # Record merges initially in preorder traversal + # We will resort into merge order at end. + merge <- matrix(NA_integer_, 2L, n.h) + + # Starting at root, traverse dendrogram recording + # information above about leaves and nodes encountered + position <- 0L # position within current node + stack <- NULL # parents of current node plus saved state + leafCount <- 0L # number of leaves seen + nodeCount <- 0L # number of nodes seen + repeat { + # Pre-order traversal of the current node. + # Will descend into non-leaf children pushing parents onto stack. + while (length(x)) { + # Record height and index list on first visit to each internal node. + if (position == 0L) { + nodeCount <- nodeCount + 1L + myNodeIndex <- nodeCount + if (nodeCount != 1L) { + myIdx[,nodeCount] <- c(stack$position, stack$myNodeIndex) + } + height[nodeCount] <- attr(x, "height") + } + position <- position + 1L + child <- x[[1L]] + x <- x[-1L] + if (is.leaf(child)) { + # Record information about leaf nodes. + leafCount <- leafCount + 1L + labsu[leafCount] <- attr(child,'label') + ord[leafCount] <- as.integer(child) + merge[position,myNodeIndex] <- - ord[leafCount] + } else { + stopifnot (length(child)==2L) + # Descend into non-leaf nodes, saving state on stack. + stack <- list (node=x, position=position, myNodeIndex=myNodeIndex, stack=stack) + x <- child + position <- 0L + } + } + # All children of current node have been traversed. + + # Terminate if current node was the root node. + if (is.null(stack)) { + break + } + + # Otherwise, pop parent node and state. + position <- stack$position # Restore position in parent node. + x <- stack$node + myNodeIndex <- stack$myNodeIndex + stack <- stack$stack + } + iOrd <- sort.list(ord) if(!identical(ord[iOrd], seq_len(n))) stop(gettextf( "dendrogram entries must be 1,2,..,%d (in any order), to be coercible to \"hclust\"", n), domain=NA) - stopifnot(n == attr(x, "members")) - n.h <- n - 1L - ## labels: not sure, if we'll use this; there should be a faster way! - labsu <- unlist(labels(x)) - labs <- labsu[iOrd] - x <- .add.dendrInd(x) - SIMP <- function(d) { - if(is.leaf(d)) { - - as.vector(d)# dropping attributes - } else { - j <<- j + 1L - height[j] <<- attr(d, "height") - inds[[j]] <<- attr(d, ".indx.") - attributes(d) <- NULL # drop all, incl. class - ## recursively apply to components: - d[] <- lapply(d, SIMP) - d - } - } - height <- numeric(n.h); inds <- vector("list",n.h); j <- 0L - xS <- SIMP(x) # -> a simplified version of 'x' [nested list] *plus* 'height' and 'inds' - ## ties: break ties "compatibly" with .add.dendrInd() -- relies on stable sort here: + ## ties: break ties "compatibly" with above preorder traversal -- relies on stable sort here: ii <- sort.list(height, decreasing=TRUE)[n.h:1L] - verbose <- getOption("as.hclust.dendr", FALSE) - merge <- matrix(NA_integer_, 2L, n.h) - for(k in seq_len(n.h)) { - if(verbose) cat(sprintf("ii[k=%2d]=%2d ", k, ii[k])) - s <- if(k < n.h) { - if(length(in.k <- inds[[ ii[k] ]])) - xS[[in.k]] - } else xS - if(verbose) { cat("-> s=xS[[in.k]]="); str(s) } - stopifnot(length(s) == 2L, all( vapply(s, is.integer, NA) ))# checking.. - merge[,k] <- unlist(s) - if(k < n.h) - xS[[in.k]] <- + k + stopifnot (ii[n.h]==1L) + + # Record internal merges + k <- seq_len(n.h-1L) + merge[t(myIdx[,ii[k]])] <- + k + + if (getOption("as.hclust.dendr", FALSE)) { + for(k in seq_len(n.h)) { + cat(sprintf("ii[k=%2d]=%2d ", k, ii[k])) + cat("-> s=merge[[,ii[k]]]=") + str(merge[,ii[k]]) + } } - structure(list(merge = t(merge), - height = height[ii], + structure(list(merge = t(merge[,ii]), # Resort into merge order + height = height[ii], # Resort into merge order order = ord, - labels = labs, + labels = labsu[iOrd], call = match.call(), method = NA_character_, dist.method = NA_character_), @@ -141,24 +211,6 @@ class = "hclust") } -##' Auxiliary for as.hclust.dendrogram() : -##' add the c(i1,i2,..) list indices to each non-leaf of a dendrogram -##' --> allowing "random access" into the dendrogram -.add.dendrInd <- function(x) -{ - add.I <- function(x, ind) { - if(!is.leaf(x)) { - for(i in seq_along(x)) - x[[i]] <- add.I(x[[i]], c(ind, i)) - attr(x, ".indx.") <- ind - } - x - } - ## apply recursively: - add.I(x, integer()) -} - - ### MM: 'FIXME' (2002-05-14): ### ===== ## We currently (mis)use a node's "members" attribute for two things: @@ -248,7 +300,7 @@ str.dendrogram <- function (object, max.level = NA, digits.d = 3L, give.attr = FALSE, - wid = getOption("width"), nest.lev = 0, indent.str = "", + wid = getOption("width"), nest.lev = 0L, indent.str = "", last.str = getOption("str.dendrogram.last"), stem = "--", ...) { ## TO DO: when object is part of a larger structure which is str()ed @@ -263,44 +315,56 @@ paste(paste(names(fl), fl, sep = sep), collapse = ", ") } - ## when indent.str ends in a blank, i.e. "last" (see below) - istr <- sub(" $", last.str, indent.str) - cat(istr, stem, sep = "") + todo <- NULL; # Nodes to process after this one + repeat { + ## when indent.str ends in a blank, i.e. "last" (see below) + istr <- sub(" $", last.str, indent.str) + cat(istr, stem, sep = "") - at <- attributes(object) - memb <- at[["members"]] - hgt <- at[["height"]] - if(!is.leaf(object)) { - le <- length(object) - if(give.attr) { - if(nzchar(at <- pasteLis(at, c("class", "height", "members")))) - at <- paste(",", at) - } - cat("[dendrogram w/ ", le, " branches and ", memb, " members at h = ", - format(hgt, digits = digits.d), if(give.attr) at, - "]", if(!is.na(max.level) && nest.lev == max.level)" ..", "\n", - sep = "") - if (is.na(max.level) || nest.lev < max.level) { - for(i in 1L:le) { - ##cat(indent.str, nam.ob[i], ":", sep = "") - str(object[[i]], nest.lev = nest.lev + 1, - indent.str = paste(indent.str, if(i < le) " |" else " "), - last.str = last.str, stem = stem, - max.level = max.level, digits.d = digits.d, - give.attr = give.attr, wid = wid) + at <- attributes(object) + memb <- at[["members"]] + hgt <- at[["height"]] + if(!is.leaf(object)) { + le <- length(object) + if(give.attr) { + if(nzchar(at <- pasteLis(at, c("class", "height", "members")))) + at <- paste(",", at) } + cat("[dendrogram w/ ", le, " branches and ", memb, " members at h = ", + format(hgt, digits = digits.d), if(give.attr) at, + "]", if(!is.na(max.level) && nest.lev == max.level)" ..", "\n", + sep = "") + if (is.na(max.level) || nest.lev < max.level) { + # Push children onto todo list in reverse order. + # Assumes at least one child. + nest.lev <- nest.lev + 1L; + todo <- list(object=object[[le]], nest.lev = nest.lev, indent.str = paste(indent.str, " "), todo=todo); + indent.str <- paste (indent.str, " |"); + while ((le <- le - 1L) > 0L) { + todo <- list(object=object[[le]], nest.lev = nest.lev, indent.str = indent.str, todo=todo); + } + } + } else { ## leaf + cat("leaf", + if(is.character(at$label)) paste("", at$label,"", sep = '"') else + format(object, digits = digits.d),"") + any.at <- hgt != 0 + if(any.at) cat("(h=",format(hgt, digits = digits.d)) + if(memb != 1) #MM: when can this happen? + cat(if(any.at)", " else {any.at <- TRUE; "("}, "memb= ", memb, sep = "") + at <- pasteLis(at, c("class", "height", "members", "leaf", "label")) + if(any.at || nzchar(at)) cat(if(!any.at)"(", at, ")") + cat("\n") } - } else { ## leaf - cat("leaf", - if(is.character(at$label)) paste("", at$label,"", sep = '"') else - format(object, digits = digits.d),"") - any.at <- hgt != 0 - if(any.at) cat("(h=",format(hgt, digits = digits.d)) - if(memb != 1) #MM: when can this happen? - cat(if(any.at)", " else {any.at <- TRUE; "("}, "memb= ", memb, sep = "") - at <- pasteLis(at, c("class", "height", "members", "leaf", "label")) - if(any.at || nzchar(at)) cat(if(!any.at)"(", at, ")") - cat("\n") + # Advance to next node, if any. + if (is.null(todo)) { + break; + } else { + object <- todo$object; + nest.lev <- todo$nest.lev; + indent.str <- todo$indent.str; + todo <- todo$todo; + } } invisible() }
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel