Ahh, I was specifying the second argument FALSE incorrectly. Works now as: mydata$FNAME_SUSPECT <- ifelse(mydata$FNAME_TOKEN_COUNT > 3, TRUE, ifelse(mydata$FNAME_LENGTH > 55, TRUE, ifelse(regexpr("9", mydata$FNAME_PATTERN) == 0, TRUE, FALSE ) ) )
On Tue, Feb 18, 2014 at 10:21 AM, Jeff Johnson <mrjeffto...@gmail.com>wrote: > This is my first time with ifelse, but I've tried: > > mydata$FNAME_SUSPECT <- ifelse(mydata$FNAME_TOKEN_COUNT > 3, TRUE, FALSE, > ifelse(mydata$FNAME_LENGTH > 35, TRUE, FALSE, > ifelse(regexpr("9", mydata$FNAME_PATTERN) > 0, TRUE, > FALSE > ) > ) > ) > > Error in ifelse(mydata$FNAME_TOKEN_COUNT > 3, TRUE, FALSE, > ifelse(mydata$FNAME_LENGTH > : > unused argument (ifelse(mydata$FNAME_LENGTH > 35, TRUE, FALSE, > ifelse(regexpr("9", mydata$FNAME_PATTERN) > 0, TRUE, FALSE))) > > I have the R for Dummies book which covers it a bit, but I just ordered > the R Cookbook. > > > On Tue, Feb 18, 2014 at 10:16 AM, David Carlson <dcarl...@tamu.edu> wrote: > >> Not always true, but it is in this case: >> >> ?ifelse >> >> David C >> >> -----Original Message----- >> From: r-help-boun...@r-project.org >> [mailto:r-help-boun...@r-project.org] On Behalf Of Jeff Johnson >> Sent: Tuesday, February 18, 2014 11:24 AM >> To: R help >> Subject: [R] Updating a data frame based on if condition >> >> I have a subset of data that I have identified as "suspect" (for >> example, >> the first name has excessive spaces, is longer than 35 >> characters or has a >> number). >> >> What I want to do is update the FNAME_SUSPECT field in "mydata" >> to TRUE if >> any of those conditions are met. >> >> Here's my data: >> > dput(mydata) >> structure(list(PERSON_FIRST_NAME = c("1298530", "JULIA, TAYLOR, >> CS AND >> JEFF", >> "88", "4465891170098562", "1124211", "LEWIS & MARY KAY", "KARL R >> O S", >> "5466181820076010", "JULI0 C", "WAYNE T.", "1124211", >> "1124211", >> "ROBERT B & VIONA D", "DENNIS and MARY SUE", "BRIAN JOANNE", >> "1124211", "RONALD and GAIL", "Mike and Mary Lou", "31763006", >> "7", "11460735", "Paul and Mary Beth", "JIMMY and RUTH MARIE", >> "1124211", "WAYNE & LU ANN", "SCOTT & ANNA MARIE", "1124211", >> "1124211", "952714", "DAVID, RHONDA and NATALIE", "VIRGINIA >> S", >> "707069", "4397836190001917", "MARIA DE LA LUZ", "MARIA DE LA >> LUZ", >> "G & S COMPUTERIZED GRADING", "1124211", "1124211", "1124211", >> "1124211", "MARIA DE LA LUZ", "ED AND JANICE KISHI", "1124211", >> "Garrett A. and Jenny E.", "1124211", "1124211", "Hiram T. and >> A. Judith", >> "MA DE LA LUZ", "STEVE, Bev, and Caleb", "MR AND MRS EVER"), >> FNAME_SUSPECT = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, >> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, >> FALSE, >> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, >> FALSE, >> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, >> FALSE, >> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, >> FALSE, >> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), >> FNAME_LENGTH = c(7L, 26L, 2L, 16L, 7L, 16L, 10L, 16L, 7L, >> 10L, 7L, 7L, 18L, 19L, 14L, 7L, 16L, 17L, 8L, 1L, 8L, 18L, >> 20L, 7L, 14L, 18L, 7L, 7L, 6L, 25L, 12L, 6L, 16L, 15L, 15L, >> 26L, 7L, 7L, 7L, 7L, 15L, 19L, 7L, 23L, 7L, 7L, 22L, 12L, >> 21L, 15L), FNAME_PATTERN = c("9999999", >> "AAAAA,_AAAAAA,_AA_AAA_AAAA", >> "99", "9999999999999999", "9999999", "AAAAA_&_AAAA_AAA", >> "AAAA_A_A_A", "9999999999999999", "AAAA9_A", "AAAAA___A.", >> "9999999", "9999999", "AAAAAA_A_&_AAAAA_A", >> "AAAAAA_AAA_AAAA_AAA", >> "AAAAA___AAAAAA", "9999999", "AAAAAA_AAA__AAAA", >> "AAAA_AAA_AAAA_AAA", >> "99999999", "9", "99999999", "AAAA_AAA_AAAA_AAAA", >> "AAAAA_AAA_AAAA_AAAAA", >> "9999999", "AAAAA_&_AA_AAA", "AAAAA_&_AAAA_AAAAA", >> "9999999", >> "9999999", "999999", "AAAAA,_AAAAAA_AAA_AAAAAAA", >> "AAAAAAAA___A", >> "999999", "9999999999999999", "AAAAA_AA_AA_AAA", >> "AAAAA_AA_AA_AAA", >> "A_&_A_AAAAAAAAAAAA_AAAAAAA", "9999999", "9999999", >> "9999999", >> "9999999", "AAAAA_AA_AA_AAA", "AA_AAA_AAAAAA_AAAAA", >> "9999999", >> "AAAAAAA_A._AAA_AAAAA_A.", "9999999", "9999999", >> "AAAAA_A._AAA_A._AAAAAA", >> "AA_AA_AA_AAA", "AAAAA,_AAA,_AAA_AAAAA", "AA_AAA_AAA_AAAA" >> ), FNAME_TOKEN_COUNT = c(1L, 5L, 1L, 1L, 1L, 4L, 4L, 1L, >> 2L, 4L, 1L, 1L, 5L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, >> 1L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 4L, 5L, 1L, 1L, >> 1L, 1L, 4L, 4L, 1L, 5L, 1L, 1L, 5L, 4L, 4L, 4L)), .Names = >> c("PERSON_FIRST_NAME", >> "FNAME_SUSPECT", "FNAME_LENGTH", "FNAME_PATTERN", >> "FNAME_TOKEN_COUNT" >> ), row.names = c(6717L, 11035L, 11626L, 14965L, 17874L, 24341L, >> 25582L, 25834L, 26851L, 30134L, 36385L, 45244L, 46947L, 61449L, >> 67564L, 71465L, 73782L, 75278L, 78977L, 79037L, 80577L, 81644L, >> 84427L, 86286L, 89963L, 91208L, 94054L, 99518L, 114658L, >> 128305L, >> 129082L, 137492L, 137573L, 138556L, 139489L, 148757L, 153956L, >> 155546L, 160533L, 162386L, 162681L, 165220L, 168063L, 173003L, >> 175322L, 179935L, 180991L, 181215L, 183787L, 184573L), class = >> "data.frame") >> >> Note I defaulted all of the FNAME_SUSPECT to FALSE. I plan to >> change that >> later. >> >> I've tried running this: >> if(mydata$FNAME_TOKEN_COUNT > 3 | mydata$FNAME_LENGTH > 35 | >> regexpr("9", >> mydata$FNAME_PATTERN) > 0) >> mydata$FNAME_SUSPECT <- TRUE >> >> however I get the error: >> Warning message: >> In if (mydata$FNAME_TOKEN_COUNT > 3 | mydata$FNAME_LENGTH > 35 | >> : >> the condition has length > 1 and only the first element will >> be used >> >> Would I be better doing this in a for loop? I had once heard >> that if you're >> doing a for loop in R, you're doing something wrong. >> -- >> Jeff >> >> [[alternative HTML version deleted]] >> >> ______________________________________________ >> R-help@r-project.org mailing list >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide >> http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible >> code. >> >> > > > -- > Jeff > -- Jeff [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.