Ok so this seems to work :)
tmp=rnorm(2000) X.background = 5+tmp Y.background = 5+ (10*tmp+rnorm(2000)) X.specific = 3.5+3*runif(3000) Y.specific = 5+120*runif(3000) X = c(X.background, X.specific) Y = c(Y.background, Y.specific) MINx=range(X)[1] MAXx=range(X)[2] MINy=range(Y)[1] MAXy=range(Y)[2] ## estimates the density for each datapoint nBins=50 my.lims= c(range(X,na.rm=TRUE),range(Y,na.rm=TRUE)) z1 = kde2d(X,Y,n=nBins, lims=my.lims, h= c( (my.lims[2]-my.lims[1])/(nBins/4) , (my.lims[4]-my.lims[3])/(nBins/4) ) ) X.cut = cut(X, seq(z1$x[1], z1$x[nBins],len=(nBins+1) )) Y.cut = cut(Y, seq(z1$y[1], z1$y[nBins],len=(nBins+1) )) xy.cuts = data.frame(X.cut,Y.cut, ord=1:(length(X.cut)) ) density = data.frame( X=rep(factor(levels(X.cut)),rep(nBins) ), Y=rep(factor(levels(Y.cut)), rep(nBins,nBins) ) , Z= as.vector(z1$z)) xy.density = merge( xy.cuts, density, by=c(1,2), sort=FALSE, all.x=TRUE) xy.density = xy.density[order(x=xy.density$ord),] ### Now uses the density as a weight my.loess = loess(Y ~ X, data.frame( X = X, Y = Y), family="symmetric", degree=2, span=0.1, weights= xy.density$Z^3) lo.pred = predict(my.loess, data.frame(X = seq(MINx, MAXx, length=100)), se=TRUE) plot( seq(MINx, MAXx, length=100), lo.pred$fit, lwd=2,col=2, "l") #, ylim=c(0, max(tmp$fit, na.rm=TRUE) ) , col="dark grey") points(X,Y, pch=".", col= grey(abs(my.loess$res)/max(abs(my.loess$res))) ) On 10 March 2012 18:30, Emmanuel Levy <emmanuel.l...@gmail.com> wrote: > Hi, > > I posted a message earlier entitled "How to fit a line through the > "Mountain crest" ..." > > I figured loess is probably the best way, but it seems that the > problem is the robustness of the fit. Below I paste an example to > illustrate the problem: > > tmp=rnorm(2000) > X.background = 5+tmp; Y.background = 5+ (10*tmp+rnorm(2000)) > X.specific = 3.5+3*runif(1000); Y.specific = 5+120*runif(1000) > X = c(X.background, X.specific);Y = c(Y.background, Y.specific) > MINx=range(X)[1];MAXx=range(X)[2] > > my.loess = loess(Y ~ X, data.frame( X = X, Y = Y), > family="symmetric", degree=2, span=0.1) > lo.pred = predict(my.loess, data.frame(X = seq(MINx, MAXx, > length=100)), se=TRUE) > plot( seq(MINx, MAXx, length=100), lo.pred$fit, lwd=2,col=2, "l") > points(X,Y, col= grey(abs(my.loess$res)/max(abs(my.loess$res))) ) > > As you will see, the red line does not follow the "background" signal. > However, when decreasing the "specific" signal to 500 points it > becomes perfect. > > I'm sure there is a way to "tune" the fitting so that it works but I > can't figure out how. Importantly, *I cannot increase the span* > because in reality the relationship I'm looking at is more complex so > I need a small span value to allow for a close fit. > > I foresee that changing the "weigthing" is the way to go but I do not > really understand how the "weight" option is used (I tried to change > it and nothing happened), and also the embedded "tricubic weighting" > does not seem changeable. > > So any idea would be very welcome. > > Emmanuel ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.