Hi Sarah, Thank you very much for the response.
In fact, it does work on Mac even without including the space: > Symbol <- "GOOG" > require(XML) Loading required package: XML > URL <- paste("http://earnings.com/company.asp?client=cb&ticker=", Symbol, > sep="") > x <- readHTMLTable(URL, stringsAsFactors=FALSE) > table.loc <- tail(grep("Earnings Releases", x), 1) + 1 > if (identical(numeric(0), table.loc)) return(NULL) > rdata <- x[[table.loc]] > header <- rdata[1, ] > rdata <- rdata[-1, ] > colnames(rdata) <- header > #format ticker column > rdata[, 1] <- gsub("\r\n\t\t\t", "", rdata[, 1]) > rdata <- na.omit(rdata) > > any(is.na(rdata)) [1] FALSE > rdata[rdata == "n/a"] <- NA > any(is.na(rdata)) [1] TRUE Garrett On Fri, Feb 3, 2012 at 9:57 AM, Sarah Goslee <sarah.gos...@gmail.com> wrote: > Is that exactly what you're doing, in a clean session? > > x <- rdata[27, 4] > >> x == "n/a " > [1] TRUE >> x == "n/a" > [1] FALSE > > Because as long as the space is included, the test should be TRUE. > > (I renamed the dput object rdata, because df() is a base function.) > > df[df == "n/a"] <- NA > shouldn't work on Mac, or any other system, because no elements of > your data frame are "n/a", but are instead "n/a " > > If it were my data, I'd get rid of the spaces at the end of the values before > trying to do anything, either before reading it into R, or with gsub() after. > > Sarah > > On Fri, Feb 3, 2012 at 10:25 AM, G See <gsee...@gmail.com> wrote: >> I have a data.frame named "df". The dput of df is at the bottom of this >> e-mail. >> What I'd like to do is replace the "n/a " values with NA. On Mac OSX, it >> works >> to do this: >> df[df == "n/a"] <- NA >> >> However, it does not work on Ubuntu. See below. >> >> Thanks in advance, >> Garrett >> >>> x <- df[27, 4] # complete data.frame dput is below >>> dput(x) >> "n/a " >>> x == "n/a " >> [1] FALSE >>> x == "n/a" >> [1] FALSE >>> str(x) >> chr "n/a " >>> is.na(x) >> [1] FALSE >>> grep("n/a ", x) >> integer(0) >>> grep("n/a", x) >> [1] 1 >> >> >>> sessionInfo() >> R version 2.14.1 (2011-12-22) >> Platform: x86_64-pc-linux-gnu (64-bit) >> >> locale: >> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C >> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 >> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 >> [7] LC_PAPER=C LC_NAME=C >> [9] LC_ADDRESS=C LC_TELEPHONE=C >> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C >> >> attached base packages: >> [1] stats graphics grDevices utils datasets methods base >> >> other attached packages: >> [1] XML_3.4-3 qmao_1.1.10 >> [3] FinancialInstrument_0.10.9 quantmod_0.3-17 >> [5] TTR_0.21-0 Defaults_1.1-1 >> [7] xts_0.8-3 zoo_1.7-6 >> >> loaded via a namespace (and not attached): >> [1] grid_2.14.1 lattice_0.20-0 tools_2.14.1 >>> >> >> >> ### More detail ### >> ## Here is the complete data.frame >>> dput(df) >> structure(list(SYMBOL = c("GOOG ", "GOOG ", "GOOG ", "GOOG ", >> "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", >> "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", >> "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG ", >> "GOOG ", "GOOG ", "GOOG ", "GOOG ", "GOOG "), PERIOD = c("Q4 2011", >> "Q3 2011", "Q2 2011", "Q1 2011", "Q4 2010", "Q3 2010", "Q2 2010", >> "Q1 2010", "Q4 2009", "Q3 2009", "Q2 2009", "Q1 2009", "Q4 2008", >> "Q3 2008", "Q2 2008", "Q1 2008", "Q4 2007", "Q3 2007", "Q2 2007", >> "Q1 2007", "Q4 2006", "Q3 2006", "Q2 2006", "Q1 2006", "Q4 2005", >> "Q3 2005", "Q2 2005", "Q1 2005", "Q4 2004", "Q3 2004"), >> `EVENT TITLE` = c("Q4 2011 Google Earnings Release", "Q3 2011 >> Google Inc Earnings Release", >> "Q2 2011 Google Inc Earnings Release", "Q1 2011 Google Inc >> Earnings Release", >> "Q4 2010 Google Earnings Release", "Q3 2010 Google Earnings Release", >> "Q2 2010 Google Earnings Release", "Q1 2010 Google Earnings Release", >> "Q4 2009 Google Earnings Release", "Q3 2009 Google Earnings Release", >> "Q2 2009 Google Earnings Release", "Q1 2009 Google Earnings Release", >> "Q4 2008 Google Earnings Release", "Q3 2008 Google Earnings Release", >> "Q2 2008 Google Earnings Release", "Q1 2008 Google Earnings Release", >> "Q4 2007 Google Earnings Release", "Q3 2007 Google Earnings Release", >> "Q2 2007 Google Earnings Release", "Q1 2007 Google Earnings Release", >> "Q4 2006 Google Earnings Release", "Q3 2006 Google Earnings Release", >> "Q2 2006 Google Earnings Release", "Q1 2006 Google Earnings Release", >> "Q4 2005 Google Earnings Release", "Q3 2005 Google Earnings Release", >> "Q2 2005 Google Earnings Release", "Q1 2005 Google Earnings Release", >> "Q4 2004 Google Earnings Release", "Q3 2004 Google Earnings Release" >> ), `EPS ESTIMATE` = c("$ 10.49 ", "$ 8.74 ", "$ 7.85 ", >> "$ 8.10 ", "$ 8.09 ", "$ 6.68 ", "$ 6.52 ", "$ 6.60 ", >> "$ 6.50 ", "$ 5.42 ", "$ 5.09 ", "$ 4.93 ", "$ 4.95 ", >> "$ 4.76 ", "$ 4.74 ", "$ 4.52 ", "$ 4.44 ", "$ 3.78 ", >> "$ 3.59 ", "$ 3.30 ", "$ 2.92 ", "$ 2.42 ", "$ 2.22 ", >> "$ 1.97 ", "n/a ", "n/a ", "n/a ", "n/a ", "n/a ", >> "n/a "), `EPS ACTUAL` = c("$ 9.50 ", "$ 9.72 ", "$ 8.74 ", >> "$ 8.08 ", "$ 8.75 ", "$ 7.64 ", "$ 6.45 ", "$ 6.76 ", >> "$ 6.79 ", "$ 5.89 ", "$ 5.36 ", "$ 5.16 ", "$ 5.10 ", >> "$ 4.92 ", "$ 4.63 ", "$ 4.84 ", "$ 4.43 ", "$ 3.91 ", >> "$ 3.56 ", "$ 3.68 ", "$ 3.18 ", "$ 2.62 ", "$ 2.49 ", >> "$ 2.29 ", "n/a ", "n/a ", "n/a ", "n/a ", "n/a ", >> "n/a "), `PREV. YEAR ACTUAL` = c("$ 8.75 ", "$ 7.64 ", >> "$ 6.45 ", "$ 6.76 ", "$ 6.79 ", "$ 5.89 ", "$ 5.36 ", >> "$ 5.16 ", "$ 5.10 ", "$ 4.92 ", "$ 4.63 ", "$ 4.84 ", >> "$ 4.43 ", "$ 3.91 ", "$ 3.56 ", "$ 3.68 ", "$ 3.18 ", >> "$ 2.62 ", "$ 2.49 ", "$ 2.29 ", "n/a ", "n/a ", "n/a ", >> "n/a ", "n/a ", "n/a ", "n/a ", "n/a ", "n/a ", "n/a " >> ), TIME = c("2012-01-19 15:15:00 CST", "2011-10-13 15:15:00 CDT", >> "2011-07-14 15:15:00 CDT", "2011-04-14 15:15:00 CDT", "2011-01-20 >> 15:15:00 CST", >> "2010-10-14 15:15:00 CDT", "2010-07-15 15:15:00 CDT", "2010-04-15 >> 15:15:00 CDT", >> "2010-01-21 15:15:00 CST", "2009-10-15 15:15:00 CDT", "2009-07-16 >> 15:15:00 CDT", >> "2009-04-16 15:15:00 CDT", "2009-01-22 15:15:00 CST", "2008-10-16 >> 15:15:00 CDT", >> "2008-07-17 15:15:00 CDT", "2008-04-17 15:15:00 CDT", "2008-01-31 >> 15:15:00 CST", >> "2007-10-18 15:15:00 CDT", "2007-07-19 15:15:00 CDT", "2007-04-19 >> 15:15:00 CDT", >> "2007-01-31 15:15:00 CST", "2006-10-19 15:15:00 CDT", "2006-07-20 >> 15:15:00 CDT", >> "2006-04-20 15:15:00 CDT", "2006-01-31 15:15:00 CST", "2005-10-20 >> 15:15:00 CDT", >> "2005-07-21 15:15:00 CDT", "2005-04-21 15:15:00 CDT", "2005-02-01 >> 15:15:00 CST", >> "2004-10-21 15:15:00 CDT")), .Names = c("SYMBOL", "PERIOD", >> "EVENT TITLE", "EPS ESTIMATE", "EPS ACTUAL", "PREV. YEAR ACTUAL", >> "TIME"), row.names = 2:31, na.action = structure(31L, .Names = "32", >> class = "omit"), class = "data.frame") >> > > > -- > Sarah Goslee > http://www.functionaldiversity.org ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.