I have the data frame with the following structure Operator Score Date Freq 1 A Crap Apr 2013 0 2 D Crap Apr 2013 0 3 J Crap Apr 2013 0 4 L Crap Apr 2013 0 5 M Crap Apr 2013 0 6 A Good Apr 2013 1 7 D Good Apr 2013 14 8 J Good Apr 2013 26 9 L Good Apr 2013 3 10 M Good Apr 2013 9
I would like to aggregate this data such that I can find the relative frequency of each score (Good, Ok, Poor and Crap) for each combination of month and operator. For example For operator A in the month Jan 2013 - I would like the following output Operator Score Date Freq Rel.Freq 1 A Crap Jan 2013 0 0 2 A Poor Jan 2013 5 0.22 3 A Good Jan 2013 15 0.65 4 A Ok Jan 2013 3 0.13 i.e I would like to add a relative frequency column to my existing data.frame. I haven't got anywhere near an automated solution. The closest I have is tmp <- subset(df, Operator == "A") tmp$N.norm <- tmp$Freq/sum(ans2$Freq) However this sums all data for operator A regardless of date. So I would need to subset again according to date. Is there a straightforward way to do this in R structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("A", "D", "J", "L", "M"), class = "factor"), Score = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("Crap", "Good", "OK", "Poor"), class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L ), .Label = c("Apr 2013", "Feb 2013", "Jan 2013", "Mar 2013", "May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L, 0L, 1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L, 29L, 14L, 0L, 0L, 0L, 0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L, 10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L, 0L, 0L, 0L, 0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L, 12L, 41L, 9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L, 1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L, 25L, 15L, 0L, 0L, 0L, 0L, 0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L, 9L, 10L, 9L)), .Names = c("Operator", "Score", "Date", "Freq" ), row.names = c(NA, -100L), class = "data.frame") -- View this message in context: http://r.789695.n4.nabble.com/R-relative-frequency-by-date-and-operator-tp4667498.html Sent from the R help mailing list archive at Nabble.com. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.