Data? It's difficult to do anything without some test data.See How to make a great R reproducible example? or http://adv-r.had.co.nz/Reproducibility.html with particular reference to the use of dput() as the best way to provide sample data.
| | | | | | | | | | | How to make a great R reproducible example? When discussing performance with colleagues, teaching, sending a bug report or searching for guidance on mailing... | | | | On Wednesday, May 17, 2017 6:10 PM, Sumanta Basak <sumant...@gmail.com> wrote: Hi All, I've a data-set on product sub-product matrix on which I'm doing multiple calculation, but unfortunately using nested loops, the programme is taking long time to execute. Can anyone help me how to get rid of the following jungle? Any direction would be helpful. GA <- "India" verticle <- "Prod1" prod_data <- readRDS(paste0("/Prod_ladder_",GA,"_",verticle,".rds")) setDF(prod_data) Final_data <- subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm")],!duplicated(prod_data[,c("P_KEY","Active_Prod_Id")])) proximity_prod_mapping <- readRDS("Proximity_prod_mapping.rds") dst_prod <- subset(prod_data[,c("P_KEY")],!duplicated(prod_data$P_KEY)) output_data <- c() data_merge_final <- c() system.time({ for(i in 1 : length(dst_prod)){ prod_data <- subset(prod_data,prod_data$P_KEY == dst_prod[i]) # Subsetting data at prod level dst_prod <- subset(prod_data[,c("Active_Prod_Id")],!duplicated(prod_data$Active_Prod_Id)) # Finding distinct prods of active prodloyee for(j in 1 : length(dst_prod)){ # Subsetting data at prod level for active prod # Fetiching data for Anchor prod prod_data1 <- subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm","Start_Date_1","End_Date_1")],prod_data$Active_Prod_Id == dst_prod[j]) prod_data1$Anchor_prod <- 1 anc_max_End_Date_1 <- as.Date(max(prod_data1$End_Date_1),origin = "1970-01-01") anc_prod_count <- sum(prod_data1$Anchor_prod) # Fetiching data for Proximate prod prox_prod_data <- subset(proximity_prod_mapping[,c("Proximate_prod_ID")],proximity_prod_mapping$Anchor_prod_ID == dst_prod[j]) prod_data2 <- subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm","Start_Date_1","End_Date_1")],prod_data$Active_Prod_Id %in% c(prox_prod_data)) prox_sill_count <- 0 if(nrow(prod_data2) > 0){ prod_data2$Proximity_prod <- 1 prox_max_End_Date_1 <- as.Date(max(prod_data2$End_Date_1),origin = "1970-01-01") prox_sill_count <- sum(prod_data2$Proximity_prod) } # library(plyr) prod_data <-rbind.fill(prod_data1,prod_data2) prod_data$exclude <- 0 prod_data$Anchor_Active_Prod_Id <- dst_prod[j] prod_data$Start_Date_1 <- as.Date(prod_data$Start_Date_1,origin = "1970-01-01") prod_data$End_Date_1 <- as.Date(prod_data$End_Date_1,origin = "1970-01-01") if(prox_sill_count > 0){ if(nrow(prod_data) > 1){ # Trimming end date of proximity prods where end data of proximity prod is greater that Anchor prod if((prox_max_End_Date_1 - anc_max_End_Date_1) > 0){ prod_data$End_Date_1 <- ifelse(prod_data$Proximity_prod == 1 & (prod_data$End_Date_1 - anc_max_End_Date_1) > 0, anc_max_End_Date_1,prod_data$End_Date_1) prod_data$End_Date_1 <- as.Date(prod_data$End_Date_1,origin = "1970-01-01") } prod_data$exclude <- ifelse(prod_data$Proximity_prod == 1 & (as.Date(prod_data$Start_Date_1,origin = "1970-01-01") - anc_max_End_Date_1) > 0,1,0) prod_data <- subset(prod_data,prod_data$exclude == 0) prod_data <- arrange(prod_data,prod_data$Anchor_prod,desc(prod_data$End_Date_1),prod_data$Start_Date_1) prod_data$Anchor_prod <- ifelse(is.na (prod_data$Anchor_prod),0,prod_data$Anchor_prod) prod_data$Proximity_prod <- ifelse(is.na (prod_data$Proximity_prod),0,prod_data$Proximity_prod) prod_data$new_rec <- 0 tot_loop <- nrow(prod_data) k=1 # Looping to map start date and end date of each row with other rows while(k <= tot_loop){ excl_flag <- prod_data[k,c("exclude")] if(excl_flag == 0){ st_dt1 <- as.Date(prod_data[k,c("Start_Date_1")]) end_dt1 <- as.Date(prod_data[k,c("End_Date_1")]) prod_flag1 <- prod_data[k,c("Anchor_prod")] if(k != nrow(prod_data)){ tot_row <- nrow(prod_data) for(m in 1 : (tot_row -k)){ l = k+m if(l != k){ st_dt2 <- as.Date(prod_data[l,c("Start_Date_1")]) end_dt2 <- as.Date(prod_data[l,c("End_Date_1")]) prod_flag2 <- prod_data[l,c("Anchor_prod")] flag_excl <- prod_data[l,c("exclude")] if(flag_excl ==0){ rec_check <- prod_data[l,c("new_rec")] # if(rec_check == 0){ prod_data$Start_date2 <- NA prod_data$End_date2 <- NA new_start_date <- as.Date(ifelse(prod_flag1 == 1 & prod_flag2 == 1,NA, ifelse(prod_flag1 == 1 & prod_flag2 == 0 & end_dt2 > end_dt1 & st_dt2 < end_dt1,end_dt1, ifelse(prod_flag1 == 0 & prod_flag2 == 1 & end_dt1 > end_dt2 & st_dt1 < end_dt2,end_dt2,NA))),origin = "1970-01-01") message(paste0("new_start_date = ",new_start_date)) new_start_date <- as.Date(new_start_date,origin = "1970-01-01") message(paste0("new_start_date = ",new_start_date)) new_end_date <- as.Date(ifelse(prod_flag1 == 1 & prod_flag2 == 1,NA, ifelse(prod_flag1 == 1 & prod_flag2 == 0 & end_dt2 > end_dt1 & st_dt2 < end_dt1,end_dt2, ifelse(prod_flag1 == 0 & prod_flag2 == 1 & end_dt1 > end_dt2 & st_dt1 < end_dt2,end_dt1,NA))),origin = "1970-01-01") message(paste0("new_end_date = ",new_end_date)) new_end_date <- as.Date(new_end_date,origin = "1970-01-01") message(paste0("new_end_date = ",new_end_date)) prod_data[l,c("Start_date2")] <- as.Date(new_start_date,origin = "1970-01-01") prod_data[l,c("End_date2")] <- as.Date(new_end_date,origin = "1970-01-01") tmp_data <- subset(prod_data,!is.na (prod_data$Start_date2)) tmp_data$Start_Date_1 <- as.Date(tmp_data$Start_date2,origin = "1970-01-01") tmp_data$End_Date_1 <- as.Date(tmp_data$End_date2,origin = "1970-01-01") if(nrow(tmp_data)){ tmp_data$new_rec <- 1 prod_data[l,c("End_Date_1")] <- as.Date(end_dt1,origin = "1970-01-01") } prod_data <- rbind(prod_data,tmp_data) tot_row <- tot_row + nrow(tmp_data) tot_loop <- tot_loop + nrow(tmp_data) prod_data$Start_date2 <- NULL prod_data$End_date2 <- NULL # } } # Condition to identify true subset # overlap <- ifelse((st_dt1 >= st_dt2 & st_dt1 <= end_dt2) & (end_dt1 >= st_dt2 & end_dt1 <= end_dt2),1, # ifelse((st_dt2 >= st_dt1 & st_dt2 <= end_dt1) & (end_dt2 >= st_dt1 & end_dt2 <= end_dt1),1,0)) if((end_dt1 - st_dt2) >= 0){ if((end_dt2 - st_dt1) >= 0){ if((st_dt2 - st_dt1) >=0){ prod_data[k,c("exclude")] <- ifelse(prod_flag1 == 1 & prod_flag2 == 1,9999, #if Anchor prods have overlapping ifelse(prod_flag1 == 1 & prod_flag2 == 0,0, ifelse(prod_flag1 == 0 & prod_flag2 == 1,1, ifelse(prod_flag1 == 0 & prod_flag2 == 0,0,1)))) prod_data[l,c("exclude")] <- ifelse(prod_flag1 == 1 & prod_flag2 == 1,9999, ifelse(prod_flag1 == 0 & prod_flag2 == 1,0, ifelse(prod_flag1 == 1 & prod_flag2 == 0,1, ifelse(prod_flag1 == 0 & prod_flag2 == 0,1,0)))) } } } # Condition to trim the dates as to make dates in each observation mutually exclusive to exch other flag_excl <- prod_data[l,c("exclude")] if(flag_excl == 0){ if(end_dt1 > st_dt2){ if(st_dt1 >= st_dt2){ new_date <- ifelse(end_dt2 > st_dt1,as.Date(st_dt1,origin = "1970-01-01"),as.Date(end_dt2,origin = "1970-01-01")) new_date <- as.Date(new_date,origin = "1970-01-01") old_date <- as.Date(prod_data[l,c("End_Date_1")],origin = "1970-01-01") old_date <- as.Date(old_date,origin = "1970-01-01") # prod_data[j,c("End_Date_1")] <- ifelse(prod_flag1 == 1 & prod_flag2 == 1,as.Date(old_date,origin = "1970-01-01"), # ifelse(prod_flag1 == 0 & prod_flag2 == 1,as.date(old_date, origin = "1970-01-01"),as.Date(new_date,origin = "1970-01-01"))) prod_data[l,c("End_Date_1")] <- as.Date(ifelse(prod_flag1 == 1 & prod_flag2 == 1,old_date,ifelse(prod_flag1 == 0 & prod_flag2 == 1,old_date,new_date)),origin = "1970-01-01") } } } } } } } k=k+1 } } } # excluding non required observations prod_data <- subset(prod_data,prod_data$exclude == 0) prod_data$multiply_factor <- ifelse(prod_data$Anchor_prod == 1,1, ifelse(prod_data$Proximity_prod == 1,0.5,9999)) prod_data$recency_in_months <- (as.Date("2017-01-31") - prod_data$End_Date_1)/30 prod_data$recency_factor <- ifelse(prod_data$recency_in_months <= 12,1, ifelse(prod_data$recency_in_months > 12 & prod_data$recency_in_months <= 24,0.9, ifelse(prod_data$recency_in_months > 24 & prod_data$recency_in_months <= 36,0.8, ifelse(prod_data$recency_in_months > 36 & prod_data$recency_in_months <= 48,0.7, ifelse(prod_data$recency_in_months > 48,0.6,9999))))) prod_data$duration_in_months <- (prod_data$End_Date_1 - prod_data$Start_Date_1)/30 prod_data$weight <- prod_data$duration_in_months*prod_data$multiply_factor*prod_data$recency_factor prod <- prod_data[1,c("Anchor_Active_Prod_Id")] if(nrow(prod_data) > 1){ data_merge <-with(prod_data,aggregate(weight ~ P_KEY, FUN = function(x) c(Proficiency_Score = sum(x)))) }else{ data_merge <- prod_data[1,c("P_KEY","weight")] } data_merge$prod <- prod_data[1,c("Anchor_Active_Prod_Id")] data_merge_final <- rbind(data_merge_final,data_merge) # Recency and Duration calculation goes here and final score will be added in final data output_data <- rbind.fill(output_data,prod_data) } } Final_data <- merge(Final_data,data_merge_final,by.x= c("P_KEY", "Active_Prod_Id"),by.y = c("P_KEY", "prod"),all.x=TRUE) names(Final_data)[names(Final_data) == "weight"] <- "Proficiency_Score" emerging_prod_mapping <- readRDS("5.Emerging_prod_Lookup.rds") emerging_prod_list <- subset(emerging_prod_mapping[,c("prod_ID")],!duplicated(emerging_prod_mapping$prod_ID)) Final_data$Emerging_Traditional <- ifelse(Final_data$Active_Prod_Id %in% c(emerging_prod_list),"Emerging","Traditional") Final_data$Final_Proficiency <- ifelse(Final_data$Emerging_Traditional == "Traditional", ifelse(Final_data$Proficiency_Score < 12, "P0", ifelse(Final_data$Proficiency_Score >=12 & Final_data$Proficiency_Score < 24,"P1", ifelse(Final_data$Proficiency_Score >=24 & Final_data$Proficiency_Score < 48,"P2", ifelse(Final_data$Proficiency_Score >=48 & Final_data$Proficiency_Score < 60,"P3", ifelse(Final_data$Proficiency_Score >=60,"P4",NA))))), ifelse(Final_data$Emerging_Traditional == "Emerging", ifelse(Final_data$Proficiency_Score < 6, "P0", ifelse(Final_data$Proficiency_Score >=6 & Final_data$Proficiency_Score < 12,"P1", ifelse(Final_data$Proficiency_Score >=12 & Final_data$Proficiency_Score < 24,"P2", ifelse(Final_data$Proficiency_Score >=24 & Final_data$Proficiency_Score < 30,"P3", ifelse(Final_data$Proficiency_Score >=30,"P4",NA))))),NA)) tst <- prod_data[,c("P_KEY", "Id")] tst <- subset(tst,!duplicated(tst)) Final_data <- merge(Final_data,tst[,c("P_KEY","Id")],by="P_KEY",all.x=TRUE) }) *SUMANTA BASAK* [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.