Terry, I recently noticed the censor argument of survfit. For some analyses it greatly reduces the size of the resulting object, which is a nice feature.
However, when combined with the id argument, only 1 prediction is made. Predictions can be made individually but I'd prefer to do them all at once if that change can be made. Chris ##################################### # CODE # create data set.seed(20130625) n <- 100 # sample size x <- rbinom(n, 1, 0.5) # covariate z <- rep(0, n) # start time y <- rexp(n, exp(x)) # event time e <- y < 2 # censor at 2 y <- pmin(y, 2) # observation time dat <- data.frame(x,z,y,e) # fit cox model with start/stop format library(survival) mod <- coxph(Surv(z, y, e)~x, data=dat) summary(mod) # create prediction dataset with 3 individuals with # x = 0 on (0,2) # x = 1 on (0,2) # x = 0 on (0,1) and x = 1 on (1,2) datnew <- data.frame(x=c(0,1,0,1), z=c(0,0,0,1), y=c(2,2,1,2), e=rep(0,4), id=c(1,2,3,3)) datnew # as expected modsf1 <- survfit(mod, newdata=datnew, id=id) modsf1 # not as expected modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE) modsf2 # for comparison modsf3 <- survfit(mod, newdata=datnew[1:2,]) modsf3 # appears to work when individual=FALSE (id not specified) modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE) modsf4 # visually par(mfrow=c(2,2)) plot(modsf1, col=1:3, lty=1:3, conf.int=FALSE) plot(modsf2, col=1:3, lty=1:3, conf.int=FALSE) plot(modsf3, col=1:2, lty=1:2, conf.int=FALSE) plot(modsf4, col=1:2, lty=1:2, conf.int=FALSE) # Can be done individually modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE) modsf2a modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE) modsf2b modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE) modsf2c # one at a time par(mfrow=c(1,1)) plot(modsf2a, col=1, lty=1, conf.int=FALSE) lines(modsf2b, col=2, lty=2, conf.int=FALSE) lines(modsf2c, col=3, lty=3, conf.int=FALSE) ##################################### # OUTPUT > # create data > > set.seed(20130625) > n <- 100 # sample size > x <- rbinom(n, 1, 0.5) # covariate > z <- rep(0, n) # start time > y <- rexp(n, exp(x)) # event time > e <- y < 2 # censor at 2 > y <- pmin(y, 2) # observation time > dat <- data.frame(x,z,y,e) > # fit cox model with start/stop format > library(survival) > mod <- coxph(Surv(z, y, e)~x, data=dat) > summary(mod) Call: coxph(formula = Surv(z, y, e) ~ x, data = dat) n= 100, number of events= 98 coef exp(coef) se(coef) z Pr(>|z|) x 0.7162 2.0466 0.2091 3.425 0.000614 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 exp(coef) exp(-coef) lower .95 upper .95 x 2.047 0.4886 1.359 3.083 Concordance= 0.601 (se = 0.029 ) Rsquare= 0.109 (max possible= 0.999 ) Likelihood ratio test= 11.58 on 1 df, p=0.0006666 Wald test = 11.73 on 1 df, p=0.0006137 Score (logrank) test = 12.18 on 1 df, p=0.0004831 > # create prediction dataset with 3 individuals with > # x = 0 on (0,2) > # x = 1 on (0,2) > # x = 0 on (0,1) and x = 1 on (1,2) > datnew <- data.fra .... [TRUNCATED] > datnew x z y e id 1 0 0 2 0 1 2 1 0 2 0 2 3 0 0 1 0 3 4 1 1 2 0 3 > # as expected > modsf1 <- survfit(mod, newdata=datnew, id=id) > modsf1 Call: survfit(formula = mod, newdata = datnew, id = id) records n.max n.start events median 0.95LCL 0.95UCL 0 100 100 100 98 0.663 0.457 0.948 <NA> 100 100 100 98 0.333 0.288 0.457 <NA> 100 100 100 98 0.663 0.457 0.948 > # not as expected > modsf2 <- survfit(mod, newdata=datnew, id=id, censor=FALSE) > modsf2 Call: survfit(formula = mod, newdata = datnew, censor = FALSE, id = id) records n.max n.start events median 0.95LCL 0.95UCL 100.000 100.000 100.000 294.000 0.663 0.457 0.948 > # for comparison > modsf3 <- survfit(mod, newdata=datnew[1:2,]) > modsf3 Call: survfit(formula = mod, newdata = datnew[1:2, ]) records n.max n.start events median 0.95LCL 0.95UCL [1,] 100 100 100 98 0.663 0.457 0.948 [2,] 100 100 100 98 0.333 0.288 0.457 > # appears to work when individual=FALSE (id not specified) > modsf4 <- survfit(mod, newdata=datnew[1:2,], censor=FALSE) > modsf4 Call: survfit(formula = mod, newdata = datnew[1:2, ], censor = FALSE) records n.max n.start events median 0.95LCL 0.95UCL [1,] 100 100 100 98 0.663 0.457 0.948 [2,] 100 100 100 98 0.333 0.288 0.457 > modsf2a <- survfit(mod, newdata=datnew[1,], id=id, censor=FALSE) > modsf2a Call: survfit(formula = mod, newdata = datnew[1, ], censor = FALSE, id = id) records n.max n.start events median 0.95LCL 0.95UCL 100.000 100.000 100.000 98.000 0.663 0.457 0.948 > modsf2b <- survfit(mod, newdata=datnew[2,], id=id, censor=FALSE) > modsf2b Call: survfit(formula = mod, newdata = datnew[2, ], censor = FALSE, id = id) records n.max n.start events median 0.95LCL 0.95UCL 100.000 100.000 100.000 98.000 0.333 0.288 0.457 > modsf2c <- survfit(mod, newdata=datnew[3:4,], id=id, censor=FALSE) > modsf2c Call: survfit(formula = mod, newdata = datnew[3:4, ], censor = FALSE, id = id) records n.max n.start events median 0.95LCL 0.95UCL 100.000 100.000 100.000 98.000 0.663 0.457 0.948 ********************************************************** Electronic Mail is not secure, may not be read every day, and should not be used for urgent or sensitive issues ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.