You might try this to get the data into a dataframe that would be easier to process. You can subset and split the dataframe to look at different combination.
> str01 <- "2007-10-12 11:50:05 state B. ,2007-10-12 11:50:05 state C. ,2007-10-12 13:23:24 state D. ,2007-10-12 13:23:43 state E. ,2007-10-14 15:43:19 state F. ,2007-10-14 15:43:20 state E. ,2007-10-14 15:43:25 state G. ,2007-10-14 15:43:32 state H. ,2007-10-14 15:43:41 state I. ,2007-10-14 15:43:47 state F. ,2007-10-14 15:43:47 state G. ,2007-10-14 15:48:08 state H. ,2007-10-16 10:10:20 state J. ,2007-10-19 11:12:54 state K ,2007-10-19 11:17:37 state D. ,2007-10-19 11:17:42 state E. ,2007-10-19 11:17:49 state F. ,2007-10-19 11:17:51 state E. ,2007-10-19 11:17:58 state H. ,2007-10-19 11:18:05 state J. ,2007-10-19 11:21:45 state L." > x.1 <- strsplit(str01, '[,]')[[1]] > x.2 <- do.call(rbind, strsplit(x.1, ' ')) > # clean up the state to only letters > x.2[,4] <- sub("([[:alnum:]]+).*", "\\1 <file://0.0.0.1/>", x.2[,4]) > states <- data.frame(time=as.POSIXct(paste(x.2[,1], x.2[,2])), state=x.2[,4]) > > > > states time state 1 2007-10-12 11:50:05 B 2 2007-10-12 11:50:05 C 3 2007-10-12 13:23:24 D 4 2007-10-12 13:23:43 E 5 2007-10-14 15:43:19 F 6 2007-10-14 15:43:20 E 7 2007-10-14 15:43:25 G 8 2007-10-14 15:43:32 H 9 2007-10-14 15:43:41 I 10 2007-10-14 15:43:47 F 11 2007-10-14 15:43:47 G 12 2007-10-14 15:48:08 H 13 2007-10-16 10:10:20 J 14 2007-10-19 11:12:54 K 15 2007-10-19 11:17:37 D 16 2007-10-19 11:17:42 E 17 2007-10-19 11:17:49 F 18 2007-10-19 11:17:51 E 19 2007-10-19 11:17:58 H 20 2007-10-19 11:18:05 J 21 2007-10-19 11:21:45 L On Tue, Jan 12, 2010 at 3:22 PM, Andreas Wittmann <andreas_wittm...@gmx.de>wrote: > Dear R-users, > > actually i try to parse some state protocols for my work. i an easy > stetting the code below works fine, if states are reached only once. in > harder settings it could be possible that one state gets visited more times. > in this case for me its interesting to see how much waiting time lies > between to states on the whole. > > by the way i didn't use R as a parsing tool so far, so any advice for doing > this more effectivly are quite welcome. > > str01 <- "2007-10-12 11:50:05 state B. ,2007-10-12 11:50:05 state C. > ,2007-10-12 13:23:24 state D. ,2007-10-12 13:23:43 state E. ,2007-10-14 > 15:43:19 state F. ,2007-10-14 15:43:20 state E. ,2007-10-14 15:43:25 state > G. ,2007-10-14 15:43:32 state H. ,2007-10-14 15:43:41 state I. ,2007-10-14 > 15:43:47 state F. ,2007-10-14 15:43:47 state G. ,2007-10-14 15:48:08 state > H. ,2007-10-16 10:10:20 state J. ,2007-10-19 11:12:54 state K ,2007-10-19 > 11:17:37 state D. ,2007-10-19 11:17:42 state E. ,2007-10-19 11:17:49 state > F. ,2007-10-19 11:17:51 state E. ,2007-10-19 11:17:58 state H. ,2007-10-19 > 11:18:05 state J. ,2007-10-19 11:21:45 state L." > > str02 <- unlist(strsplit(str01, "\\,")) > > x1 <- grep("state B", str02) > x2 <- grep("state C", str02) > x3 <- grep("state D", str02) > x4 <- grep("state E", str02) > x5 <- grep("state F", str02) > x6 <- grep("state G", str02) > x7 <- grep("state H", str02) > x8 <- grep("state I", str02) > x9 <- grep("state J", str02) > x10 <- grep("state K", str02) > x11 <- grep("state L", str02) > > t1 <- substr(str02[x1], 1, 19) > t1 <- as.POSIXct(strptime(t1, "%Y-%m-%d %H:%M:%S")) > t2 <- substr(str02[x2], 1, 19) > t2 <- as.POSIXct(strptime(t2, "%Y-%m-%d %H:%M:%S")) > t3 <- substr(str02[x3], 1, 19) > t3 <- as.POSIXct(strptime(t3, "%Y-%m-%d %H:%M:%S")) > t4 <- substr(str02[x4], 1, 19) > t4 <- as.POSIXct(strptime(t4, "%Y-%m-%d %H:%M:%S")) > t5 <- substr(str02[x5], 1, 19) > t5 <- as.POSIXct(strptime(t5, "%Y-%m-%d %H:%M:%S")) > t6 <- substr(str02[x6], 1, 19) > t6 <- as.POSIXct(strptime(t6, "%Y-%m-%d %H:%M:%S")) > t7 <- substr(str02[x7], 1, 19) > t7 <- as.POSIXct(strptime(t7, "%Y-%m-%d %H:%M:%S")) > t8 <- substr(str02[x8], 1, 19) > t8 <- as.POSIXct(strptime(t8, "%Y-%m-%d %H:%M:%S")) > t9 <- substr(str02[x9], 1, 19) > t9 <- as.POSIXct(strptime(t9, "%Y-%m-%d %H:%M:%S")) > t10 <- substr(str02[x10], 1, 19) > t10 <- as.POSIXct(strptime(t10, "%Y-%m-%d %H:%M:%S")) > t11 <- substr(str02[x11], 1, 19) > t11 <- as.POSIXct(strptime(t11, "%Y-%m-%d %H:%M:%S")) > > as.numeric(difftime(t11, t1, units="days")) > > ## waiting times between state E and F > sum(as.numeric(difftime(t5, t4, units="days"))) > > > best regards > > Andreas > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html<http://www.r-project.org/posting-guide.html> > and provide commented, minimal, self-contained, reproducible code. > -- Jim Holtman Cincinnati, OH +1 513 646 9390 What is the problem that you are trying to solve? [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.