Not the same data your using but... # get something like a reasonable # age dist for dummy data: data('mw.ages', package='UsingR') mw.age <- mw.ages[2:80,]
# size of dummy dataset n.obs <- 1000 #pr of survival pr.s <- 0.80 # dummy data dat <- data.frame( pclass=sample(c('1st', '2nd', '3rd'), n.obs, T), age=sample(1:79, n.obs, T, apply(mw.age, 1, sum)/sum(mw.age)), sex=sample(c('male', 'female'), n.obs, T, apply(mw.age, 2, sum)/sum(mw.age)), survived=sample(0:1, n.obs, T, c(1-pr.s, pr.s)) ) dat$age.group <- cut(dat$age, seq(0, 80, 10), right=F) #----------------------------------------------------- # get the logits attach(dat) logits <- by(dat, list(age.group, sex, pclass), function(x){ p <- sum(x$survived)/nrow(x) p/(1-p) }) detach(dat) #----------------------------------------------------- #----------------------------------------------------- # the class of the object logits is 'by' - # this function munges it into a data frame # (note that 'as.data.frame' is generic) # ARGS # x an object of class 'by' # out.names names you want to use # in the output data frame. as.data.frame.by <- function(x, out.names=NULL) { nms <- dimnames(x) no.levs <- sapply(nms, length) res <- as.numeric(x) for(i in 1:length(nms)){ res <- data.frame( if(i==1) rep(nms[[i]], times=prod(no.levs[(i+1):length(nms)])) else if(i==length(nms)) rep(nms[[i]], each=prod(no.levs[1:(i-1)])) else rep(nms[[i]], each=prod(no.levs[1:(i-1)]), times=prod(no.levs[(i+1):length(nms)])) , res) } names(res) <- out.names res } as.data.frame(logits, c('age.group', 'sex', 'pclass', 'logit')) #----------------------------------------------------- 2008/10/18 andyer weng <[EMAIL PROTECTED]>: > hi all, > > For my question in the first email below, I found I made a mistake on > my coding in the previous email, the one I was trying to type should > be > >> grouped.titanic.df<-data.frame(group.age.group=sort(unique(titanic.df$age.group)), > + > expand.grid(sex=sort(unique(titanic.df$sex)),pclass=sort(unique(titanic.df$pclass))), > + r=as.vector(tapply(titanic.df$survived,titanic.df$age.group,sum)), > + n=as.vector(tapply(titanic.df$survived,titanic.df$age.group,length))) > > Error in data.frame(group.age.group = sort(unique(titanic.df$age.group)), : > arguments imply differing number of rows: 8, 6 > > > please advise what I have done wrong? why the error message come up. > Am I doing the right thing to fix the question i mentioned in the > first email (the bottom email)? > > Cheers. Andyer > > > ---------- Forwarded message ---------- > From: andyer weng <[EMAIL PROTECTED]> > Date: 2008/10/18 > Subject: Fwd: Categorial Response Questions > To: r-help@r-project.org > > > hi all, > > me again. i try to type the following coding for my question below, > but it comes up a error messgae. please advise whether the way i was > trying to do will solve my question stated in the previous email. If > so , please advise what is wrong with my coding. > (p.s. all the data are stored in xxx.df) > >> grouped.xxx.df<-data.frame(group.age.group=sort(unique(xxx.df$age.group)), > + > expand.grid(sex=c("female","male"),age.group=c("0-9","10-19","20-29","30-39","40-49","50-59","60-69","70-79")), > + r=tapply(xxx.df$survived,titanic.df$age.group,sum), > + n=tapply(xxx.df$survived,titanic.df$age.group,length)) > > Error in data.frame(group.age.group = sort(unique(xxx.df$age.group)), : > arguments imply differing number of rows: 8, 16 > In addition: Warning messages: > 1: In Ops.factor(left) : + not meaningful for factors > 2: In Ops.factor(left) : + not meaningful for factors > > > thanks millions. > > Regards, > Andyer > > > > > > > ---------- Forwarded message ---------- > From: andyer weng <[EMAIL PROTECTED]> > Date: 2008/10/18 > Subject: Fwd: Categorial Response Questions > To: r-help@r-project.org > > > Sorry Guys, i press the wrong button to send out the uncompleted message. > > let me do it again. > > my purpose for below questions is to assess the effect of class, age > and sex on the survival. > > > I have a data set containing : > > pclass: A factor giving the class of the passenger: one of 1st, 2nd, 3rd. > age: The age of the passenger in years. > sex: Passenger's gender: female or male > age.group: Passengers age group, one of 0‐9 , 10‐19, 20‐29, > 30‐39, 40‐49, 50‐59, 60‐69,70‐79 > survived: Passenger's survival (1=survived, 0=did not survive) > > Ignoring the variable age, > - I need to group the data into groups corresponding to each > age‐group/sex/class combination, > - I need to compute the logits for each combination. > - Make a data frame containing the logits, and the categorical > variables. I need to have one line in the data frame for each > combination of the factor levels. > > Can someone please help with the R code for above???!!! > > Thanks millions!! > > Cheers > Andyer. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.