Dear Muhammed:

You have got to be kidding!

Learn to use R's debugging tools! See the section in the R Language
Definition Manual on Debugging and get to work.

-- Bert



On Thu, May 16, 2013 at 7:42 PM, Muhammad Azam <maza...@yahoo.com> wrote:

> Dear friends!
> Hope all of you are fine. I am doing work on decision trees. I
> have made following program to construct classification trees. It works but
> some times it gives an error message e.g. "negative extents to matrix". I
> tried to solve the issue but could not. Now, I need help, how to avoid this
> message? Thanks
>
> ####################################
> ds=iris; dl=nrow(ds)   # ds: dataset, dl: data length
> c1=ds[,1]; c2=ds[,2]; c3=ds[,3]; c4=ds[,4]; c5=ds[,5]; id=c(1:dl);
> iris=cbind(c1,c2,c3,c4,c5)
> tec1NF = 0; tec2NF = 0; lmsum = 0; indTrainlist = list(); indTestlist =
> list()
> options(digits=6)
>
>  
> CTPrundUnprund=function(dataset,samplesize,noofsamples,noofpredictors,response,method,nc,nlvls,nnodes)
>  {
> vlf1=list(); vlf2=array(0,dim=c(noofsamples,nlvls,nnodes)); cl=list();
> uncl=array(0,dim=c(noofsamples,nlvls,1)); vvlf1=list(); nodeclas=c();
> tdi=c(); eTraining=c(); nodeside=c(); unclp=array(0,dim=c(nlvls,1));
> vvlf2=array(0,dim=c(noofsamples,nlvls,nnodes)); nodeclasp=c(); nodesidep=c()
>
>    for (j in 1:noofsamples)
>    {
>      groupindex = 1:nrow(dataset)
>      index = sample(groupindex,samplesize, replace=TRUE)
>      indTrain=unique(sort(index)); indTest=groupindex[-indTrain];
> m=dataset[indTrain,]; lm <- nrow(m); lmsum <<- lmsum + lm;
> #w=dataset[indTest,];
>      tdi[j]=list(indTrain); indTrainlist[j] = list(indTrain);
> indTestlist[j] =
>  list(indTest)
>  mcat=table(m[,response])
>  mcat=data.frame(mcat)     # or  mcat=list(mcat)
>  mcat=mcat[,1]
>  mcat=matrix(mcat)
>  mcat=as.real(mcat)
>  print(j)
>   d1=0
>   sv=function(dataset)
>   {
>  gnew=c(); ggini=c(); gtwoing=c(); gentropy=c(); val=c(); mGnewLr=c();
> mGginiLr=c(); mGtwoingLr=c(); mGentropyLr=c(); sPjnew=c(); spLnew=c();
> spRnew=c(); matrGnew=c(); matrGgini=c(); matrGtwoing=c(); matrGentropy=c();
> sPjgini=c(); spLgini=c(); spRgini=c(); sAbsDiff=c(); sPj2=c(); spL2=c();
> spR2=c(); sPj=c(); spL=c(); spR=c(); logPj=c(); logpR=c(); logpL=c();
> variables=c()
>  m=dataset
>  mcat=table(m[,response])
>  mcat=data.frame(mcat)     # or
>  mcat=list(mcat)
>  mcat=mcat[,1]
>  mcat=matrix(mcat)
>  mcat=as.real(mcat)
>
>  Pj=table(m[,response])/nrow(m)
>  Pj
>  # Left Node
>    for (variable in 1:noofpredictors)
>    {
>      gnew=NULL; ggini=NULL; gtwoing=NULL; gentropy=NULL; val=NULL
>  varSort=sort(unique(dataset[,variable]))
> #varSort1=sort(unique(m[,variable])); minval <- min(varSort1); maxval <-
> max(varSort1); r <- maxval-minval; K <-
> round(1+(3.3*log(nrow(m),10)),digits=0)
> #; H <- r/K; varSort <- seq(minval, maxval,H)
>  L=length(varSort)
>  for (k in 1 : (L-1))
>  {
>   val[k]=varSort[k]
>   x=m[m[,variable]<=val[k]]   # Obtained set of data with all columns
> which satisfies the condition
>   lenx=length(x)    # Length of
>  x
>   lenx=lenx/ncol(m)    # xl/no of columns
>   x_left=matrix(x,lenx)   # matrix(x,xr)
>   p1=lenx/nrow(m)
>   y=x_left[,response]
>   y1=table(y)
>   feqy1=matrix(y1)
>   y1=data.frame(y1)
>   y1=y1[,1]
>   y1=matrix(y1)
>   y1=as.real(y1)
>   z=match(mcat,y1)
>   y1=y1[z]
>   y1[is.na(y1)]=0
>   feqy1=feqy1[z]
>   feqy1[is.na(feqy1)]=0
>   cbind(y1, feqy1)
>   pL=feqy1/length(x_left[,response])
>   pL
>
>   # Right Node
>   X=m[m[,variable]>val[k]]  # Obtained set of data with all columns which
> satisfies the condition
>   lenX=length(X)    # Length of x
>   lenX=lenX/ncol(m)   # xl/no of
>  columns
>   x_right=matrix(X,lenX)    # matrix(x,xr)
>   p2=lenX/nrow(m)
>   Y=x_right[,response]
>   Y1=table(Y)
>   feqy2=matrix(Y1)
>   Y1=data.frame(Y1)
>   Y1=Y1[,1]
>   Y1=matrix(Y1)
>   Y1=as.real(Y1)
>   Z=match(mcat,Y1)
>   Y1=Y1[Z]
>   Y1[is.na(Y1)]=0
>   feqy2=feqy2[Z]
>   feqy2[is.na(feqy2)]=0
>   cbind(Y1, feqy2)
>   pR=feqy2/length(x_right[,response])
>   pR
>
>   for ( i in 1: length(Pj))
>   {
>    sPjnew[i]=Pj[i]*exp(Pj[i]); spRnew[i]=pR[i]*exp(pR[i]);
> spLnew[i]=pL[i]*exp(pL[i]) # New method
>    sPjgini[i]=(Pj[i]^2); spRgini[i]=(pR[i]^2); spLgini[i]=(pL[i]^2)   #
> Gini method
>    sAbsDiff[i]=abs(pL[i]-pR[i])        # Twoing method
>    if (Pj[i]!=0)          # Entropy method
>    {
>     logPj[i]=log(Pj[i])
>    }
>     else
>     { logPj[i]= 0
>     }
>      if (pR[i]!=0)
>      {
>       logpR[i]=log(pR[i])
>      }
>      else
>      { logpR[i]= 0
>      }
>     if
>  (pL[i]!=0)
>     {
>      logpL[i]=log(pL[i])
>     }
>     else
>    {
>     logpL[i]= 0
>    }
>   sPj2[i]=(Pj[i]^2); spR2[i]=(pR[i]^2); spL2[i]=(pL[i]^2);
> sPj[i]=-(Pj[i]*logPj[i]); spL[i]=-(pL[i]*logpL[i]); spR[i]=-(pR[i]*logpR[i])
>   }
>   G1=exp(-1)*(p1*sum(spLnew)+p2*sum(spRnew)-sum(sPjnew));
> G2=p1*sum(spLgini)+p2*sum(spRgini)-sum(sPjgini)
>   G3=0.25*p1*p2*(sum(sAbsDiff))^2; G4=sum(sPj)-(p1*sum(spL))-(p2*sum(spR))
>
>   gnew[k]=G1; ggini[k]=G2; gtwoing[k]=G3; gentropy[k]=G4
>
>  }
>  gnew; ggini; gtwoing; gentropy; val
>
>  ind1=sort.list(gnew); ind2=sort.list(ggini); ind3=sort.list(gtwoing);
>  ind4=sort.list(gentropy)
>  gnew=gnew[ind1]; ggini=ggini[ind2]; gtwoing=gtwoing[ind3];
> gentropy=gentropy[ind4]
>  valGnew=val[ind1]; valGgini=val[ind2]; valGtwoing=val[ind3];
> valGentropy=val[ind4]
>
>  cmGnew=cbind(valGnew,gnew); cmGgini=cbind(valGgini,ggini);
> cmGtwoing=cbind(valGtwoing,gtwoing); cmGentropy=cbind(valGentropy,gentropy)
>  mGnew=matrix(cmGnew,length(gnew)); mGgini=matrix(cmGgini,length(ggini));
> mGtwoing=matrix(cmGtwoing,length(gtwoing));
>       mGentropy=matrix(cmGentropy,length(gentropy))
>
>  mGnewLr[variable]=list(mGnew[length(gnew),]);
> mGginiLr[variable]=list(mGgini[length(ggini),]);
>          mGtwoingLr[variable]=list(mGtwoing[length(gtwoing),]);
> mGentropyLr[variable]=list(mGentropy[length(gentropy),])
>    }
>  cbnew=do.call('cbind',mGnewLr);
>  cbgini=do.call('cbind',mGginiLr); cbtwoing=do.call('cbind',mGtwoingLr);
> cbentropy=do.call('cbind',mGentropyLr)
>
>  for(i in 1:noofpredictors){variables[i]=i}
>
>  cb11=cbnew[1,]; cb12=cbnew[2,]; cb21=cbgini[1,]; cb22=cbgini[2,];
> cb31=cbtwoing[1,]; cb32=cbtwoing[2,]; cb41=cbentropy[1,]; cb42=cbentropy[2,]
>
>  indexGnew=sort.list(cb12); indexGgini=sort.list(cb22);
> indexGtwoing=sort.list(cb32); indexGentropy=sort.list(cb42)
>  cb11=cb11[indexGnew]; cb12=cb12[indexGnew]
>  cb21=cb21[indexGgini]; cb22=cb22[indexGgini]
>  cb31=cb31[indexGtwoing]; cb32=cb32[indexGtwoing]
>  cb41=cb41[indexGentropy]; cb42=cb42[indexGentropy]
>  varGnew=variables[indexGnew]; varGgini=variables[indexGgini];
> varGtwoing=variables[indexGtwoing]; varGentropy=variables[indexGentropy]
>  cbGnew=cbind(varGnew,cb11,cb12)
>  cbGgini=cbind(varGgini,cb21,cb22)
>  cbGtwoing=cbind(varGtwoing,cb31,cb32)
>  cbGentropy=cbind(varGentropy,cb41,cb42)
>  mGnew=matrix(cbGnew,length(cb12))
>  mGgini=matrix(cbGgini,length(cb22))
>  mGtwoing=matrix(cbGtwoing,length(cb32))
>  mGentropy=matrix(cbGentropy,length(cb42))
>  matrGnew=mGnew[length(cb12),]
>  matrGgini=mGgini[length(cb22),]
>  matrGtwoing=mGtwoing[length(cb32),]
>  matrGentropy=mGentropy[length(cb42),]
>  variableimportance=list(matrGnew,matrGgini,matrGtwoing,matrGentropy)
>  variableimportance
> }
>  #sv(m)      # Passing parameters
>    LN=function(dataset,variable,value)
>    {
>
>  x_left=c()
>  m=dataset
>  x=m[m[,variable]<=value]     # Obtained set of data with all columns
> which satisfies the condition
>  lenx=length(x)     # Length of x
>  lenx=lenx/ncol(m)    # xl/no of columns
>  x_left=matrix(x,lenx)    # matrix(x,xr)
>  return(x_left)
>    }
>    RN=function(dataset,variable,value)
>    {
>  x_right=c()
>  m=dataset
>  X=m[m[,variable]>value]    # Obtained set of data with all columns which
> satisfies the condition
>  lengX=length(X)     # Length of x
>  lengX=lengX/ncol(m)    # xl/no of columns
>  x_right=matrix(X,lengX)    # Function "SpVal" ends
>  return(x_right)
>    }
> e=0; n <- 0; vr<-c(); vv<-c(); vl=NULL;
> vl=array(0,dim=c(nc,nlvls,nnodes)); vvl=NULL;
> vvl=array(0,dim=c(nc,nlvls,nnodes)); vlf=NULL; vlf=list(); vvlf=NULL;
> vvlf=list(); trec1N = 0
>     func=function(data,level)
>     {
>  var1=c()
>  f=table(data[,response]); wf=as.real(names(which.max(f))); trecN <-
> nrow(data)-max(table(data[,response]));
>  for(q in 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0;
>  if (any(var1==0) || nrow(data)<=5)
>  {
>                 n <<- n + 1
>   if (level > 0){ for (a in 1 : level)
>   {
>   vl[wf,a,n] <<- vr[a]; vvl[wf,a,n] <<- vv[a]  # vl=variable level,
> vvl=variable value level
>   }}
>   trec1N <<- trec1N + trecN
>
>    return()
>  }else
>   {
>    data1 = data; s=sv(data1)[[method]]; s1 <- s[1]; s2=s[2]; level <-
> level + 1
>
>    vr[level] <<- s1; vv[level] <<- s2 # vv=variable value
>    data=LN(data1,s1,s2)
>     func(data,level)
>    data=RN(data1,s1,s2)
>    func(data,level)
>   }
>     }
>   func(m,0)   # Passing parameters
> eTraining[j] <- trec1N
> cl[[j]]=apply(vl,3,function(vl) which(rowSums(vl)!=0))[1:n]; for(i in
> 1:n){uncl[j,i,1]=unlist(cl[[j]][i])}
> for(i in 1:nnodes){for(ii in 1:nc){if(any(vl[,,i][ii,]!=0)){vlf[[i]] <-
> vl[,,i][ii,]; vvlf[[i]] <- vvl[,,i][ii,]}}}; vlf1[[j]] <-
> do.call('rbind',vlf); vvlf1[[j]] <-
>  do.call('rbind',vvlf)
>    }; junk <- sapply(vlf1,function(i) !is.null(i))
> l=vlf1[junk]  # l for variable and ll for value selection
> x <- sapply(1:length(l), function(x) {
>   sum(sapply(l, function(y) {
>     if ( nrow(l[[x]]) != nrow(y) | ncol(l[[x]]) != ncol(y) ) FALSE
>     else sum(y != l[[x]]) == 0
>   }))
> } ); varseq.max <- max(x)
> wxmax <- which(x==max(x)); lwxmax_var <- length(wxmax);
> ll=vvlf1[junk]; llfresh <- ll[wxmax]; lfresh <- l[wxmax]
> # l for value and ll for variable selection
> x <- sapply(1:length(llfresh), function(x) {
>   sum(sapply(llfresh, function(y) {
>     if ( nrow(llfresh[[x]]) != nrow(y) | ncol(llfresh[[x]]) != ncol(y) )
> FALSE
>     else sum(y != llfresh[[x]]) == 0
>   }))
> } ); valseq.max <- max(x)
> wxmax1 <- which(x==max(x)); lwxmax_val <- length(wxmax1); bestsamples <-
> wxmax[wxmax1]; eTrainbest <- eTraining[bestsamples];
> weTrainmin <- which(eTrainbest == min(eTrainbest)); stepwisemax <-
> cbind(varseq.max,valseq.max); lweTrain_min <- length(weTrainmin);
> lastweTrainmin <- weTrainmin[lweTrain_min]; bestsamplesf <-
> bestsamples[weTrainmin]; lngth <- length(bestsamplesf); finalsample <-
> bestsamplesf[lngth];
> mm <- dataset[tdi[[finalsample]],]; l1 <- l[[finalsample]]; indxl1 <-
> colSums(l1)!=0; var <- matrix(l1[,indxl1],nrow=nrow(l1));
> allmaxfreq <- cbind(lwxmax_var,lwxmax_val,lweTrain_min); ll2 <-
> ll[[finalsample]]; indxll2 <- colSums(ll2)!=0;
> val <- round(matrix(ll2[,indxll2],nrow=nrow(ll2)),digits=4);
> mat <- as.data.frame(matrix(paste(var,"(",val,")",sep=""),nrow=nrow(var)));
> print("Sample number having max freq"); print(wxmax); print("Samples index
> having max value-wise freq from above samples"); print(wxmax1);
> print("length of max and same frequencies"); print(allmaxfreq);
> print("step-wise most repeated variable-value sequence");
> print(stepwisemax);
> print("Sample index with min error"); print(weTrainmin); print("final
> sample"); print(finalsample);
> print("Each row of matrix is terminal node, L for left and R for right
> node")
> indTrainfinal = indTrainlist[[finalsample]]; indTestfinal =
> indTestlist[[finalsample]]
> mm = dataset[indTrainfinal,]; ww = dataset[indTestfinal,]
> #print(indTrainfinal); print(indTestfinal)
> e=0; n <- 0; avelength <- lmsum/noofsamples; units <- c(); misc.units <-
> c(); deviance <- c()
>     func=function(data,testdata,level)
>     {
>  var1=c(); #print(cbind(ncol(testdata),nrow(testdata)))
>  y=table(testdata[,response]); pp=y/nrow(testdata)
>  f=matrix(y);
>  pp=matrix(pp)
>  Y=as.real(names(y))
>  z=match(mcat,Y); Y=Y[z]; Y[is.na(Y)]=0
>  f=f[z]; f[is.na(f)]=0; sumf <- sum(f)
>  pp=pp[z]; pp[is.na(pp)]=0
>  devN=f*log(pp); devN[is.na(devN)]=0; dTest=(-2)*sum(matrix(devN));
> fr=table(data[,response]); wf=as.real(names(which.max(fr)));
>  tecN <- nrow(testdata)-max(table(testdata[,response])); for(q in
> 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0
>  if (any(var1==0) || nrow(data)<=5)
>  {
>   n <<- n + 1; nodeclas[n] <<- wf; nodeside[n] <<- side
>   tec1NF <<- tec1NF + tecN; units[n] <<- sumf; misc.units[n] <<- tecN;
> deviance[n] <<- dTest
>
>    return()
>  }else
>   {
>    data1 = data; data2 = testdata; s=sv(data1)[[method]]; s1 <- s[1];
> s2=s[2]; level <- level +
>  1
>
>    data=LN(data1,s1,s2); testdata=LN(data2,s1,s2); side <<- c("L")
>     func(data,testdata,level)
>    data=RN(data1,s1,s2); testdata=RN(data2,s1,s2); side <<- c("R")
>    func(data,testdata,level)
>   }
>     }
>   func(mm,ww,0)
> deviance <- round(deviance,digits=2); LorRnode <- nodeside; class <-
> nodeclas
> resMat <- cbind(mat,LorRnode,units,misc.units,deviance,class);
> print("un-pruned tree"); print(resMat)
> eTrain <- round(mean(eTraining)/avelength*100,digits=2); eTest <-
> round(tec1NF/nrow(ww)*100,digits=2); dev.sum <- sum(deviance);
> print(cbind(eTrain,eTest,dev.sum))
> n <- 0; vr1<-c(); vv1<-c(); vlp=array(0,dim=c(nc,nlvls,nnodes));
> vvlp=array(0,dim=c(nc,nlvls,nnodes)); avelength <- lmsum/noofsamples; unit
> <- c();
> misc.unit <- c(); deviancep <- c(); wf1 <- 0.1; wf2 <- 0.2; vlfp=list();
> vvlfp=list(); #esum <- c("esum")
>     funcp=function(data,testdata,levelp)
>     {
>  var1=c(); var1l=c(); var1r=c()
>  y=table(testdata[,response]); pp=y/nrow(testdata)
>  f=matrix(y); pp=matrix(pp)
>  Y=as.real(names(y))
>  z=match(mcat,Y); Y=Y[z]; Y[is.na(Y)]=0
>  f=f[z]; f[is.na(f)]=0; sumf <- sum(f)
>  pp=pp[z]; pp[is.na(pp)]=0
>  devN=f*log(pp); devN[is.na(devN)]=0; dTest=(-2)*sum(matrix(devN));
> fr=table(data[,response]); wf=as.real(names(which.max(fr)));
>  tecN <- nrow(testdata)-max(table(testdata[,response])); e <-
> nrow(data)-max(table(data[,response]))
>  for(q in 1:response){var1[q]=var(data[,q])}; var1[is.na(var1)]=0
>  if(nrow(data) > 5 && all(var1 !=
>  0)){ss=sv(data)[[method]]; ss1=ss[1]; ss2=ss[2]; dat=LN(data,ss1,ss2);
> n1=nrow(dat);
> for(q1 in 1:response){var1l[q1]=var(dat[,q1])}; var1l[is.na(var1l)]=0; e1
> <- nrow(dat)-max(table(dat[,response])); dat1=RN(data,ss1,ss2);
> n2=nrow(dat1);
> for(q2 in 1:response){var1r[q2]=var(dat1[,q2])}; var1r[is.na(var1r)]=0;
> e2 <- nrow(dat1)-max(table(dat1[,response])); f1=table(dat[,response]);
> f2=table(dat1[,response]); wf1=as.real(names(which.max(f1)));
> wf2=as.real(names(which.max(f2)))}; #esum <- sum(e1,e2)
>  if (any(var1==0) || wf1==wf2 || nrow(data)<=5)   #  || esum==e
>  {
>   n <<- n + 1; nodeclasp[n] <<- wf; nodesidep[n] <<- side
>   if (levelp > 0){ for (b in 1 : levelp)
>   {
>   vlp[wf,b,n] <<- vr1[b]; vvlp[wf,b,n] <<- vv1[b]  # vl=variable level,
> vvl=variable value
>  level
>   }}
>   tec2NF <<- tec2NF + tecN; unit[n] <<- sumf; misc.unit[n] <<- tecN;
> deviancep[n] <<- dTest
>
>    return()
>  }else
>   {
>    data1 = data; data2 = testdata; s=sv(data1)[[method]]; s1 <- s[1];
> s2=s[2]; levelp <- levelp + 1
>
>    vr1[levelp] <<- s1; vv1[levelp] <<- s2 # vv=variable value
>
>    data=LN(data1,s1,s2); testdata=LN(data2,s1,s2); side <<- c("L")
>     funcp(data,testdata,levelp)
>    data=RN(data1,s1,s2); testdata=RN(data2,s1,s2); side <<- c("R")
>    funcp(data,testdata,levelp)
>   }
>     }
>   funcp(mm,ww,0)
> clp=apply(vlp,3,function(vlp) which(rowSums(vlp)!=0))[1:n];
> for(i in 1:nnodes){for(ii in 1:nc){if(any(vlp[,,i][ii,]!=0)){vlfp[[i]] <-
> vlp[,,i][ii,]; vvlfp[[i]] <- vvlp[,,i][ii,]}}}; vlf1p <-
> do.call('rbind',vlfp); vvlf1p <- do.call('rbind',vvlfp);
> lp=vlf1p; indlp <- colSums(lp) != 0; lp=matrix(lp[,indlp],nrow=nrow(lp));
> llp=vvlf1p; indllp <- colSums(llp) != 0;
> llp=round(matrix(llp[,indllp],nrow=nrow(llp)),digits=4);
> print("pruned tree"); matp <-
> as.data.frame(matrix(paste(lp,"(",llp,")",sep=""),nrow=nrow(lp)));
> deviance <- round(deviancep,digits=2); LorRnode <- nodesidep; class <-
> nodeclasp
> resMatp <- cbind(matp,LorRnode,unit,misc.unit,deviance,class);
> print(resMatp)
> eTrain <- round(mean(eTraining)/avelength*100,digits=2); eTest <-
> round(tec2NF/nrow(ww)*100,digits=2); dev.sum <- sum(deviance);
>  print(cbind(eTrain,eTest,dev.sum))
>  }
>   CTPrundUnprund(iris,150,1,4,5,1,3,10,10)
>
>
>
>
> best regards
>
> Muhammad Azam
>         [[alternative HTML version deleted]]
>
>
> ______________________________________________
> R-help@r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>
>


-- 

Bert Gunter
Genentech Nonclinical Biostatistics

Internal Contact Info:
Phone: 467-7374
Website:
http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to