Hi, x and y are being picked up from your global environment, not from the x and y in dataset. Here is a version that seems to work:
rm.outliers = function(dataset,var1, var2) { dataset$varpredicted = predict(lm(as.formula(paste(var1, var2, sep=" ~ ")), data=dataset)) dataset$varstdres = rstudent(lm(as.formula(paste(var1, var2, sep=" ~ ")), data=dataset)) i = length(which(dataset$varstdres > 3 | dataset$varstdres < -3)) while(i >= 1){ removed = which(dataset$varstdres > 3 | dataset$varstdres < -3) print(dataset[removed,]) dataset = dataset[-removed,] dataset$varpredicted = predict(lm(as.formula(paste(var1, var2, sep=" ~ ")), data=dataset)) dataset$varstdres = rstudent(lm(as.formula(paste(var1, var2, sep=" ~ ")), data=dataset)) i = with(dataset,length(varstdres > 3 | varstdres < -3)) } } Best, Ista On Wed, Jan 26, 2011 at 11:36 AM, kirtau <kir...@live.com> wrote: > > Hi, > > I have a few lines of code that will remove outliers for a regression test > based on the studentized residuals being above or below 3, -3. I have to do > this multiple times and have attempted to create a function to lessen the > amount of copying, pasting and replacing. > > I run into trouble with the function and receiving the error "Error in > `$<-.data.frame`(`*tmp*`, "varpredicted", value = c(0.114285714285714, : > replacement has 20 rows, data has 19 > " > > any help would be appreciated. a list of code is listed below. > > Thank you for your time! > > x = c(1:20) > y = c(1,3,4,2,5,6,18,8,10,8,11,13,14,14,15,85,17,19,19,20) > data1 = data.frame(x,y) > > # remove outliers for regression by studentized residuals being greater than > 3 > data1$predicted = predict(lm(data1$y~data1$x)) > data1$stdres = rstudent(lm(data1$y~data1$x)); > i=length(which(data1$stdres>3|data1$stdres< -3)) > while(i >= 1){ > remove<-which(data1$stdres>3|data1$stdres< -3) > print(data1[remove,]) > data1 = data1[-remove,] > data1$predicted = predict(lm(data1$y~data1$x)) > data1$stdres = rstudent(lm(data1$y~data1$x)) > i = with(data1,length(which(stdres>3|stdres< -3))) > } > > # attemp to create a function to perfom same idea as above > rm.outliers = function(dataset,var1, var2) { > > dataset$varpredicted = predict(lm(var1~var2)) > dataset$varstdres = rstudent(lm(var1~var2)) > i = length(which(dataset$varstdres > 3 | dataset$varstdres < -3)) > while(i >= 1){ > removed = which(dataset$varstdres > 3 | dataset$varstdres < -3) > print(dataset[removed,]) > dataset = dataset[-removed,] > dataset$varpredicted = predict(lm(var1~var2)) > dataset$varstdres = rstudent(lm(var1~var2)) > i = with(dataset,length(varstdres > 3 | varstdres < -3)) > } > } > -- > View this message in context: > http://r.789695.n4.nabble.com/removing-outlier-function-dataset-update-tp3238394p3238394.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > -- Ista Zahn Graduate student University of Rochester Department of Clinical and Social Psychology http://yourpsyche.org ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.