Here is one way assuming the data start after QPF FORECAST: > setwd('/temp') # where the data is > files <- c('sample1.htm', 'sample2.htm') # files to read > # assumes 4 columns of data > fields <- list(c(19, 24), c(30, 35), c(41, 46), c(52, 57)) #columns of data > results <- lapply(files, function(.file){ + inData <- FALSE # switch to indicate in data + collection <- NULL # will hold the data + inputFile <- file(.file, 'r') # open the connection + repeat{ + input <- readLines(inputFile, n = 1) + if (inData){ # parse the line and collect data + key <- sub("^.E[0-9]+[^:]*:([^:]+).*", "\\1", input) + if (nchar(key) != 4){ # done with data; return result + colnames(collection) <- colNames + close(inputFile) + return(collection) + } + # get the data assuming that 'fields' defines where data is + cols <- numeric(length(fields)) + for (i in seq_along(fields)){ + cols[i] <- as.numeric(substring(input + , fields[[i]][1] + , fields[[i]][2] + ) + ) + } + collection <- rbind(collection, cols) + rownames(collection)[nrow(collection)] <- key + } else { # looking for the start of the data + if (grepl("^:QPF FORECAST", input)){ + # extract the column names + colNames <- NULL + for (i in seq_along(fields)){ + colNames <- c(colNames, substring(input + , fields[[i]][1] + , fields[[i]][2] + ) + ) + } + inData <- TRUE # now get the data + } + } + } + }) Warning message: NAs introduced by coercion > print(results) [[1]] 7AM 1PM 7PM 1AM 0830 NA 5.6 4.4 3.8 0831 3.3 3.0 2.6 2.5 0901 2.3 2.2 2.2 2.1 0902 2.1 2.0 2.0 2.0 0903 2.0 1.9 1.9 1.9 0904 1.8 NA NA NA
[[2]] 7AM 1PM 7PM 1AM 0604 NA NA 7.0 8.4 0605 9.4 9.2 8.6 7.8 0606 6.8 5.6 4.2 3.5 0607 3.2 3.0 2.9 2.8 0608 2.8 2.8 2.7 2.7 0609 2.7 NA NA NA On Sun, Mar 11, 2012 at 4:07 PM, frauke <fh...@andrew.cmu.edu> wrote: > Thank you for the quick reply! I have attached two files. > > http://r.789695.n4.nabble.com/file/n4464511/sample1.1339z sample1.1339z > http://r.789695.n4.nabble.com/file/n4464511/sample2.1949z sample2.1949z > > -- > View this message in context: > http://r.789695.n4.nabble.com/extracting-data-from-unstructured-text-file-tp4464423p4464511.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. -- Jim Holtman Data Munger Guru What is the problem that you are trying to solve? Tell me what you want to do, not how you want to do it. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.