Dear Muhammed: You have got to be kidding!
Learn to use R's debugging tools! See the section in the R Language Definition Manual on Debugging and get to work. -- Bert On Thu, May 16, 2013 at 7:42 PM, Muhammad Azam <maza...@yahoo.com> wrote: > Dear friends! > Hope all of you are fine. I am doing work on decision trees. I > have made following program to construct classification trees. It works but > some times it gives an error message e.g. "negative extents to matrix". I > tried to solve the issue but could not. Now, I need help, how to avoid this > message? Thanks > > #################################### > ds=iris; dl=nrow(ds) # ds: dataset, dl: data length > c1=ds[,1]; c2=ds[,2]; c3=ds[,3]; c4=ds[,4]; c5=ds[,5]; id=c(1:dl); > iris=cbind(c1,c2,c3,c4,c5) > tec1NF = 0; tec2NF = 0; lmsum = 0; indTrainlist = list(); indTestlist = > list() > options(digits=6) > > > CTPrundUnprund=function(dataset,samplesize,noofsamples,noofpredictors,response,method,nc,nlvls,nnodes) > { > vlf1=list(); vlf2=array(0,dim=c(noofsamples,nlvls,nnodes)); cl=list(); > uncl=array(0,dim=c(noofsamples,nlvls,1)); vvlf1=list(); nodeclas=c(); > tdi=c(); eTraining=c(); nodeside=c(); unclp=array(0,dim=c(nlvls,1)); > vvlf2=array(0,dim=c(noofsamples,nlvls,nnodes)); nodeclasp=c(); nodesidep=c() > > for (j in 1:noofsamples) > { > groupindex = 1:nrow(dataset) > index = sample(groupindex,samplesize, replace=TRUE) > indTrain=unique(sort(index)); indTest=groupindex[-indTrain]; > m=dataset[indTrain,]; lm <- nrow(m); lmsum <<- lmsum + lm; > #w=dataset[indTest,]; > tdi[j]=list(indTrain); indTrainlist[j] = list(indTrain); > indTestlist[j] = > list(indTest) > mcat=table(m[,response]) > mcat=data.frame(mcat) # or mcat=list(mcat) > mcat=mcat[,1] > mcat=matrix(mcat) > mcat=as.real(mcat) > print(j) > d1=0 > sv=function(dataset) > { > gnew=c(); ggini=c(); gtwoing=c(); gentropy=c(); val=c(); mGnewLr=c(); > mGginiLr=c(); mGtwoingLr=c(); mGentropyLr=c(); sPjnew=c(); spLnew=c(); > spRnew=c(); matrGnew=c(); matrGgini=c(); matrGtwoing=c(); matrGentropy=c(); > sPjgini=c(); spLgini=c(); spRgini=c(); sAbsDiff=c(); sPj2=c(); spL2=c(); > spR2=c(); sPj=c(); spL=c(); spR=c(); logPj=c(); logpR=c(); logpL=c(); > variables=c() > m=dataset > mcat=table(m[,response]) > mcat=data.frame(mcat) # or > mcat=list(mcat) > mcat=mcat[,1] > mcat=matrix(mcat) > mcat=as.real(mcat) > > Pj=table(m[,response])/nrow(m) > Pj > # Left Node > for (variable in 1:noofpredictors) > { > gnew=NULL; ggini=NULL; gtwoing=NULL; gentropy=NULL; val=NULL > varSort=sort(unique(dataset[,variable])) > #varSort1=sort(unique(m[,variable])); minval <- min(varSort1); maxval <- > max(varSort1); r <- maxval-minval; K <- > round(1+(3.3*log(nrow(m),10)),digits=0) > #; H <- r/K; varSort <- seq(minval, maxval,H) > L=length(varSort) > for (k in 1 : (L-1)) > { > val[k]=varSort[k] > x=m[m[,variable]<=val[k]] # Obtained set of data with all columns > which satisfies the condition > lenx=length(x) # Length of > x > lenx=lenx/ncol(m) # xl/no of columns > x_left=matrix(x,lenx) # matrix(x,xr) > p1=lenx/nrow(m) > y=x_left[,response] > y1=table(y) > feqy1=matrix(y1) > y1=data.frame(y1) > y1=y1[,1] > y1=matrix(y1) > y1=as.real(y1) > z=match(mcat,y1) > y1=y1[z] > y1[is.na(y1)]=0 > feqy1=feqy1[z] > feqy1[is.na(feqy1)]=0 > cbind(y1, feqy1) > pL=feqy1/length(x_left[,response]) > pL > > # Right Node > X=m[m[,variable]>val[k]] # Obtained set of data with all columns which > satisfies the condition > lenX=length(X) # Length of x > lenX=lenX/ncol(m) # xl/no of > columns > x_right=matrix(X,lenX) # matrix(x,xr) > p2=lenX/nrow(m) > Y=x_right[,response] > Y1=table(Y) > feqy2=matrix(Y1) > Y1=data.frame(Y1) > Y1=Y1[,1] > Y1=matrix(Y1) > Y1=as.real(Y1) > Z=match(mcat,Y1) > Y1=Y1[Z] > Y1[is.na(Y1)]=0 > feqy2=feqy2[Z] > feqy2[is.na(feqy2)]=0 > cbind(Y1, feqy2) > pR=feqy2/length(x_right[,response]) > pR > > for ( i in 1: length(Pj)) > { > sPjnew[i]=Pj[i]*exp(Pj[i]); spRnew[i]=pR[i]*exp(pR[i]); > spLnew[i]=pL[i]*exp(pL[i]) # New method > sPjgini[i]=(Pj[i]^2); spRgini[i]=(pR[i]^2); spLgini[i]=(pL[i]^2) # > Gini method > sAbsDiff[i]=abs(pL[i]-pR[i]) # Twoing method > if (Pj[i]!=0) # Entropy method > { > logPj[i]=log(Pj[i]) > } > else > { logPj[i]= 0 > } > if (pR[i]!=0) > { > logpR[i]=log(pR[i]) > } > else > { logpR[i]= 0 > } > if > (pL[i]!=0) > { > logpL[i]=log(pL[i]) > } > else > { > logpL[i]= 0 > } > sPj2[i]=(Pj[i]^2); spR2[i]=(pR[i]^2); spL2[i]=(pL[i]^2); > sPj[i]=-(Pj[i]*logPj[i]); spL[i]=-(pL[i]*logpL[i]); spR[i]=-(pR[i]*logpR[i]) > } > G1=exp(-1)*(p1*sum(spLnew)+p2*sum(spRnew)-sum(sPjnew)); > G2=p1*sum(spLgini)+p2*sum(spRgini)-sum(sPjgini) > G3=0.25*p1*p2*(sum(sAbsDiff))^2; G4=sum(sPj)-(p1*sum(spL))-(p2*sum(spR)) > > gnew[k]=G1; ggini[k]=G2; gtwoing[k]=G3; gentropy[k]=G4 > > } > gnew; ggini; gtwoing; gentropy; val > > ind1=sort.list(gnew); ind2=sort.list(ggini); ind3=sort.list(gtwoing); > ind4=sort.list(gentropy) > gnew=gnew[ind1]; ggini=ggini[ind2]; gtwoing=gtwoing[ind3]; > gentropy=gentropy[ind4] > valGnew=val[ind1]; valGgini=val[ind2]; valGtwoing=val[ind3]; > valGentropy=val[ind4] > > cmGnew=cbind(valGnew,gnew); cmGgini=cbind(valGgini,ggini); > cmGtwoing=cbind(valGtwoing,gtwoing); cmGentropy=cbind(valGentropy,gentropy) > mGnew=matrix(cmGnew,length(gnew)); mGgini=matrix(cmGgini,length(ggini)); > mGtwoing=matrix(cmGtwoing,length(gtwoing)); > mGentropy=matrix(cmGentropy,length(gentropy)) > > mGnewLr[variable]=list(mGnew[length(gnew),]); > mGginiLr[variable]=list(mGgini[length(ggini),]); > mGtwoingLr[variable]=list(mGtwoing[length(gtwoing),]); > mGentropyLr[variable]=list(mGentropy[length(gentropy),]) > } > cbnew=do.call('cbind',mGnewLr); > cbgini=do.call('cbind',mGginiLr); cbtwoing=do.call('cbind',mGtwoingLr); > cbentropy=do.call('cbind',mGentropyLr) > > for(i in 1:noofpredictors){variables[i]=i} > > cb11=cbnew[1,]; cb12=cbnew[2,]; cb21=cbgini[1,]; cb22=cbgini[2,]; > cb31=cbtwoing[1,]; cb32=cbtwoing[2,]; cb41=cbentropy[1,]; cb42=cbentropy[2,] > > indexGnew=sort.list(cb12); indexGgini=sort.list(cb22); > indexGtwoing=sort.list(cb32); indexGentropy=sort.list(cb42) > cb11=cb11[indexGnew]; cb12=cb12[indexGnew] > cb21=cb21[indexGgini]; cb22=cb22[indexGgini] > cb31=cb31[indexGtwoing]; cb32=cb32[indexGtwoing] > cb41=cb41[indexGentropy]; cb42=cb42[indexGentropy] > varGnew=variables[indexGnew]; varGgini=variables[indexGgini]; > varGtwoing=variables[indexGtwoing]; varGentropy=variables[indexGentropy] > cbGnew=cbind(varGnew,cb11,cb12) > cbGgini=cbind(varGgini,cb21,cb22) > cbGtwoing=cbind(varGtwoing,cb31,cb32) > cbGentropy=cbind(varGentropy,cb41,cb42) > mGnew=matrix(cbGnew,length(cb12)) > mGgini=matrix(cbGgini,length(cb22)) > mGtwoing=matrix(cbGtwoing,length(cb32)) > mGentropy=matrix(cbGentropy,length(cb42)) > matrGnew=mGnew[length(cb12),] > matrGgini=mGgini[length(cb22),] > matrGtwoing=mGtwoing[length(cb32),] > matrGentropy=mGentropy[length(cb42),] > variableimportance=list(matrGnew,matrGgini,matrGtwoing,matrGentropy) > variableimportance > } > #sv(m) # Passing parameters > LN=function(dataset,variable,value) > { > > x_left=c() > m=dataset > x=m[m[,variable]<=value] # Obtained set of data with all columns > which satisfies the condition > lenx=length(x) # Length of x > lenx=lenx/ncol(m) # xl/no of columns > x_left=matrix(x,lenx) # matrix(x,xr) > return(x_left) > } > RN=function(dataset,variable,value) > { > x_right=c() > m=dataset > X=m[m[,variable]>value] # Obtained set of data with all columns which > satisfies the condition > lengX=length(X) # Length of x > lengX=lengX/ncol(m) # xl/no of columns > x_right=matrix(X,lengX) # Function "SpVal" ends > return(x_right) > } > e=0; n <- 0; vr<-c(); vv<-c(); vl=NULL; > vl=array(0,dim=c(nc,nlvls,nnodes)); vvl=NULL; > vvl=array(0,dim=c(nc,nlvls,nnodes)); vlf=NULL; vlf=list(); vvlf=NULL; > vvlf=list(); trec1N = 0 > func=function(data,level) > { > var1=c() > f=table(data[,response]); wf=as.real(names(which.max(f))); trecN <- > nrow(data)-max(table(data[,response])); > for(q in 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0; > if (any(var1==0) || nrow(data)<=5) > { > n <<- n + 1 > if (level > 0){ for (a in 1 : level) > { > vl[wf,a,n] <<- vr[a]; vvl[wf,a,n] <<- vv[a] # vl=variable level, > vvl=variable value level > }} > trec1N <<- trec1N + trecN > > return() > }else > { > data1 = data; s=sv(data1)[[method]]; s1 <- s[1]; s2=s[2]; level <- > level + 1 > > vr[level] <<- s1; vv[level] <<- s2 # vv=variable value > data=LN(data1,s1,s2) > func(data,level) > data=RN(data1,s1,s2) > func(data,level) > } > } > func(m,0) # Passing parameters > eTraining[j] <- trec1N > cl[[j]]=apply(vl,3,function(vl) which(rowSums(vl)!=0))[1:n]; for(i in > 1:n){uncl[j,i,1]=unlist(cl[[j]][i])} > for(i in 1:nnodes){for(ii in 1:nc){if(any(vl[,,i][ii,]!=0)){vlf[[i]] <- > vl[,,i][ii,]; vvlf[[i]] <- vvl[,,i][ii,]}}}; vlf1[[j]] <- > do.call('rbind',vlf); vvlf1[[j]] <- > do.call('rbind',vvlf) > }; junk <- sapply(vlf1,function(i) !is.null(i)) > l=vlf1[junk] # l for variable and ll for value selection > x <- sapply(1:length(l), function(x) { > sum(sapply(l, function(y) { > if ( nrow(l[[x]]) != nrow(y) | ncol(l[[x]]) != ncol(y) ) FALSE > else sum(y != l[[x]]) == 0 > })) > } ); varseq.max <- max(x) > wxmax <- which(x==max(x)); lwxmax_var <- length(wxmax); > ll=vvlf1[junk]; llfresh <- ll[wxmax]; lfresh <- l[wxmax] > # l for value and ll for variable selection > x <- sapply(1:length(llfresh), function(x) { > sum(sapply(llfresh, function(y) { > if ( nrow(llfresh[[x]]) != nrow(y) | ncol(llfresh[[x]]) != ncol(y) ) > FALSE > else sum(y != llfresh[[x]]) == 0 > })) > } ); valseq.max <- max(x) > wxmax1 <- which(x==max(x)); lwxmax_val <- length(wxmax1); bestsamples <- > wxmax[wxmax1]; eTrainbest <- eTraining[bestsamples]; > weTrainmin <- which(eTrainbest == min(eTrainbest)); stepwisemax <- > cbind(varseq.max,valseq.max); lweTrain_min <- length(weTrainmin); > lastweTrainmin <- weTrainmin[lweTrain_min]; bestsamplesf <- > bestsamples[weTrainmin]; lngth <- length(bestsamplesf); finalsample <- > bestsamplesf[lngth]; > mm <- dataset[tdi[[finalsample]],]; l1 <- l[[finalsample]]; indxl1 <- > colSums(l1)!=0; var <- matrix(l1[,indxl1],nrow=nrow(l1)); > allmaxfreq <- cbind(lwxmax_var,lwxmax_val,lweTrain_min); ll2 <- > ll[[finalsample]]; indxll2 <- colSums(ll2)!=0; > val <- round(matrix(ll2[,indxll2],nrow=nrow(ll2)),digits=4); > mat <- as.data.frame(matrix(paste(var,"(",val,")",sep=""),nrow=nrow(var))); > print("Sample number having max freq"); print(wxmax); print("Samples index > having max value-wise freq from above samples"); print(wxmax1); > print("length of max and same frequencies"); print(allmaxfreq); > print("step-wise most repeated variable-value sequence"); > print(stepwisemax); > print("Sample index with min error"); print(weTrainmin); print("final > sample"); print(finalsample); > print("Each row of matrix is terminal node, L for left and R for right > node") > indTrainfinal = indTrainlist[[finalsample]]; indTestfinal = > indTestlist[[finalsample]] > mm = dataset[indTrainfinal,]; ww = dataset[indTestfinal,] > #print(indTrainfinal); print(indTestfinal) > e=0; n <- 0; avelength <- lmsum/noofsamples; units <- c(); misc.units <- > c(); deviance <- c() > func=function(data,testdata,level) > { > var1=c(); #print(cbind(ncol(testdata),nrow(testdata))) > y=table(testdata[,response]); pp=y/nrow(testdata) > f=matrix(y); > pp=matrix(pp) > Y=as.real(names(y)) > z=match(mcat,Y); Y=Y[z]; Y[is.na(Y)]=0 > f=f[z]; f[is.na(f)]=0; sumf <- sum(f) > pp=pp[z]; pp[is.na(pp)]=0 > devN=f*log(pp); devN[is.na(devN)]=0; dTest=(-2)*sum(matrix(devN)); > fr=table(data[,response]); wf=as.real(names(which.max(fr))); > tecN <- nrow(testdata)-max(table(testdata[,response])); for(q in > 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0 > if (any(var1==0) || nrow(data)<=5) > { > n <<- n + 1; nodeclas[n] <<- wf; nodeside[n] <<- side > tec1NF <<- tec1NF + tecN; units[n] <<- sumf; misc.units[n] <<- tecN; > deviance[n] <<- dTest > > return() > }else > { > data1 = data; data2 = testdata; s=sv(data1)[[method]]; s1 <- s[1]; > s2=s[2]; level <- level + > 1 > > data=LN(data1,s1,s2); testdata=LN(data2,s1,s2); side <<- c("L") > func(data,testdata,level) > data=RN(data1,s1,s2); testdata=RN(data2,s1,s2); side <<- c("R") > func(data,testdata,level) > } > } > func(mm,ww,0) > deviance <- round(deviance,digits=2); LorRnode <- nodeside; class <- > nodeclas > resMat <- cbind(mat,LorRnode,units,misc.units,deviance,class); > print("un-pruned tree"); print(resMat) > eTrain <- round(mean(eTraining)/avelength*100,digits=2); eTest <- > round(tec1NF/nrow(ww)*100,digits=2); dev.sum <- sum(deviance); > print(cbind(eTrain,eTest,dev.sum)) > n <- 0; vr1<-c(); vv1<-c(); vlp=array(0,dim=c(nc,nlvls,nnodes)); > vvlp=array(0,dim=c(nc,nlvls,nnodes)); avelength <- lmsum/noofsamples; unit > <- c(); > misc.unit <- c(); deviancep <- c(); wf1 <- 0.1; wf2 <- 0.2; vlfp=list(); > vvlfp=list(); #esum <- c("esum") > funcp=function(data,testdata,levelp) > { > var1=c(); var1l=c(); var1r=c() > y=table(testdata[,response]); pp=y/nrow(testdata) > f=matrix(y); pp=matrix(pp) > Y=as.real(names(y)) > z=match(mcat,Y); Y=Y[z]; Y[is.na(Y)]=0 > f=f[z]; f[is.na(f)]=0; sumf <- sum(f) > pp=pp[z]; pp[is.na(pp)]=0 > devN=f*log(pp); devN[is.na(devN)]=0; dTest=(-2)*sum(matrix(devN)); > fr=table(data[,response]); wf=as.real(names(which.max(fr))); > tecN <- nrow(testdata)-max(table(testdata[,response])); e <- > nrow(data)-max(table(data[,response])) > for(q in 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0 > if(nrow(data) > 5 && all(var1 != > 0)){ss=sv(data)[[method]]; ss1=ss[1]; ss2=ss[2]; dat=LN(data,ss1,ss2); > n1=nrow(dat); > for(q1 in 1:response){var1l[q1]=var(dat[,q1])}; var1l[is.na(var1l)]=0; e1 > <- nrow(dat)-max(table(dat[,response])); dat1=RN(data,ss1,ss2); > n2=nrow(dat1); > for(q2 in 1:response){var1r[q2]=var(dat1[,q2])}; var1r[is.na(var1r)]=0; > e2 <- nrow(dat1)-max(table(dat1[,response])); f1=table(dat[,response]); > f2=table(dat1[,response]); wf1=as.real(names(which.max(f1))); > wf2=as.real(names(which.max(f2)))}; #esum <- sum(e1,e2) > if (any(var1==0) || wf1==wf2 || nrow(data)<=5) # || esum==e > { > n <<- n + 1; nodeclasp[n] <<- wf; nodesidep[n] <<- side > if (levelp > 0){ for (b in 1 : levelp) > { > vlp[wf,b,n] <<- vr1[b]; vvlp[wf,b,n] <<- vv1[b] # vl=variable level, > vvl=variable value > level > }} > tec2NF <<- tec2NF + tecN; unit[n] <<- sumf; misc.unit[n] <<- tecN; > deviancep[n] <<- dTest > > return() > }else > { > data1 = data; data2 = testdata; s=sv(data1)[[method]]; s1 <- s[1]; > s2=s[2]; levelp <- levelp + 1 > > vr1[levelp] <<- s1; vv1[levelp] <<- s2 # vv=variable value > > data=LN(data1,s1,s2); testdata=LN(data2,s1,s2); side <<- c("L") > funcp(data,testdata,levelp) > data=RN(data1,s1,s2); testdata=RN(data2,s1,s2); side <<- c("R") > funcp(data,testdata,levelp) > } > } > funcp(mm,ww,0) > clp=apply(vlp,3,function(vlp) which(rowSums(vlp)!=0))[1:n]; > for(i in 1:nnodes){for(ii in 1:nc){if(any(vlp[,,i][ii,]!=0)){vlfp[[i]] <- > vlp[,,i][ii,]; vvlfp[[i]] <- vvlp[,,i][ii,]}}}; vlf1p <- > do.call('rbind',vlfp); vvlf1p <- do.call('rbind',vvlfp); > lp=vlf1p; indlp <- colSums(lp) != 0; lp=matrix(lp[,indlp],nrow=nrow(lp)); > llp=vvlf1p; indllp <- colSums(llp) != 0; > llp=round(matrix(llp[,indllp],nrow=nrow(llp)),digits=4); > print("pruned tree"); matp <- > as.data.frame(matrix(paste(lp,"(",llp,")",sep=""),nrow=nrow(lp))); > deviance <- round(deviancep,digits=2); LorRnode <- nodesidep; class <- > nodeclasp > resMatp <- cbind(matp,LorRnode,unit,misc.unit,deviance,class); > print(resMatp) > eTrain <- round(mean(eTraining)/avelength*100,digits=2); eTest <- > round(tec2NF/nrow(ww)*100,digits=2); dev.sum <- sum(deviance); > print(cbind(eTrain,eTest,dev.sum)) > } > CTPrundUnprund(iris,150,1,4,5,1,3,10,10) > > > > > best regards > > Muhammad Azam > [[alternative HTML version deleted]] > > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > > -- Bert Gunter Genentech Nonclinical Biostatistics Internal Contact Info: Phone: 467-7374 Website: http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.