!************************************************************************* !* * !* LMBM-CLR - Nonsmooth Optimization based Incremental approach * !* for Clusterwise Linear Regression Software * !* * !* by Napsu Karmitsa 2016 (last modified 15.07.2016). * !* The code is partly based on codes by Adil Bagirov 2015. * !* * !* The software is free for academic teaching and research * !* purposes but I ask you to refer the appropriate references * !* given below, if you use it. * !* * !************************************************************************* !* !* !* Codes included: !* !* lmbmclr.f95 - Mainprogram for clusterwise linear regression !* software (this file). !* parameters.f95 - Parameters. Inludes modules: !* - r_precision - Precision for reals, !* - param - Parameters, !* - exe_time - Execution time. !* initlmbmclr.f95 - initialization of parameters for clusterwise !* linear regression and LMBM. Includes modules: !* - initclr - Initialization of parameters for !* clusterwise linear regeression. !* - initlmbm - Initialization of LMBM. !* lmbmclrmod.f95 - Subroutines for clusterwise linear regression. !* clrfunctionmod.f95 - Computation of function and (sub)gradients values. !* lmbm.f95 - LMBM - limited memory bundle method. !* clrobjfun.f95 - Computation of the function and subgradients values. !* subpro.f95 - subprograms for LMBM. !* !* Makefile - makefile. !* !* !* To use the software modify initlmbmclr.f95 as needed. !* !* !* References: !* !* for LMBM-CLR: !* N. Karmitsa, A. Bagirov, S. Taheri, "Clusterwise Linear Regression using LMBM" !* !* for LMBM: !* N. Haarala, K. Miettinen, M.M. Mäkelä, "Globally Convergent Limited Memory Bundle Method !* for Large-Scale Nonsmooth Optimization", Mathematical Programming, Vol. 109, No. 1, !* pp. 181-205, 2007. DOI 10.1007/s10107-006-0728-2. !* !* M. Haarala, K. Miettinen, M.M. Mäkelä, "New Limited Memory Bundle Method for Large-Scale !* Nonsmooth Optimization", Optimization Methods and Software, Vol. 19, No. 6, pp. 673-692, 2004. !* DOI 10.1080/10556780410001689225. !* !* N. Karmitsa, "Numerical Methods for Large-Scale Nonsmooth Optimization" in Big Data !* Optimization. A. Emrouznejad (eds.), Springer, 2016. !* !* for NSO clusterwise linear regression: !* A. Bagirov, N. Karmitsa, M.M. Mäkelä, "Introduction to nonsmooth optimization: theory, !* practice and software", Springer, 2014. !* !* !* Acknowledgements: !* !* The work was financially supported by the Academy of Finland (Project No. 289500). !* !***************************************************************************************** !* !* * PROGRAM lmbmclr * !* !* Main program for nonsmooth clusterwise linear regression software with LMBM. !* !***************************************************************************************** PROGRAM lmbmclr USE r_precision, ONLY : prec ! Precision for reals. USE param, ONLY : zero, one, large ! Parameters. USE initclr, ONLY : & ! Initialization of parameters. infile, & ! Dataset file. outfile1, & ! Result file with hyperplanes. outfile2, & ! Result file with predictions. outfile3, & ! Result file with training test. maxdim, & ! Maximum number of variables for optimization. maxinit, & ! Maximum number of candidate points of data set. a, & ! Data matrix a(nrecord,nft), from input file. a4, & ! Auxiliary data matrix a4(nrecord,nft). gamma1, & ! Parameter affecting the size of the set of starting point. gamma2, & ! Parameter affecting the size of the set of starting point. gamma3, & ! Parameter affecting the size of the set of starting point. gamma4, & ! Parameter affecting the size of the set of starting point. gam1, & ! Multiplier for gamma1. gam2, & ! Multiplier for gamma2. gam3, & ! Multiplier for gamma3. gam4, & ! Multiplier for gamma4. dminim, & ! toler, & ! tlimit, & ! Time limit, from user. noutcom, & ! Number of the output column, form user. itest, & ! Selection of test set usage: ! 1: no test set, ! 2: one training set and one test set, ! 3: cross-validation. ntrain, & ! Number of points in training set. nfoldmax, & ! Number of cross validations. iscale, & ! Selection of scaling: ! 1: scaling, ! 2: no scaling. nupdate, & ! Number of updates. naux, & ! Number of auxiliary function evaluations. naux2, & ! Number of first auxiliary function evaluations. nfit, & ! Number of function evaluations. nclust, & ! Maximum number of clusters, from user. nft, & ! Number of features in data, from user. nrecord, & ! Number of instances in data, from user. nrecord1, & ! Number of instances used in computions. nrecord2, & ! Number of instances used in computions. nc, & ! Current number of clusters, loops from 1 to nclust. ns, & ! Switch for auxiliary and real clustering problem. nel1, & nob1, & nl, & nlist, & nlisttrain, & init_clrpar, & ! Furher initialization of parameters. def_clrpar ! Default values of clustering parameters. USE clrmod, ONLY : & ! Subprograms for clusterwise linear regression. scaling, & ! ok rescaling, & ! ok distribution, & ! ok step1, & ! ok cleaning, & ! ok checkfinal ! ok USE clrfunctionmod, ONLY : & clrfunc, & ! Function calculus. auxfunc ! Function calculus. USE lmbm_mod, ONLY : optim2 ! LMBM method for optimization. !USE bfgs_mod, ONLY : optim1 ! BFGS method for solving auxiliary problem. USE exe_time, ONLY : getime ! Execution time. IMPLICIT NONE REAL(KIND=prec), DIMENSION(:), allocatable :: & x, & x1, & xbest, & x4, & z, & !z1, & d8, & fv, & fv2, & a1, & xc REAL(KIND=prec), DIMENSION(:,:), allocatable :: & ftr, & ftest, & rmsetr, & rmsetest, & cetr, & cetest REAL(KIND=prec) :: & tol2, & p2,p3,p4, & d7,d9, & f,f2,f3, & fv1, & fbest,fb, & fbest1, & fmin,fmin1, & ff,ff1,rmse1,rmse2,ce1,ce2 REAL :: & time1, & ! Starting time time4, & ! Finishing time timef ! CPU time used INTEGER :: & nrecord3, & nrecord4, & i,j,j1,k, & jpoints, & nfold, & n1,nl1, & ncount, & neltotal, & ngood0, & naux2tot, & nauxtot, & nfittot ! Intrinsic Functions INTRINSIC MAX allocate(a1(nft),x(maxdim),xc(maxinit),x1(maxdim),x4(maxdim),xbest(maxdim),z(maxdim), & d8(nrecord),fv(nrecord),fv2(nrecord)) allocate(ftr(nclust,nft),ftest(nclust,nft),rmsetr(nclust,nft),rmsetest(nclust,nft),cetr(nclust,nft),cetest(nclust,nft)) ! Nämä voisi lopussa vain laittaa sinne missä lasketaan toleria. tol2=1.0E-7_prec*REAL(nrecord,prec) !tol2=1.0E-6_prec*REAL(nrecord,prec) IF (nrecord > 15000) tol2 = 1.0E-4_prec*REAL(nrecord,prec) !! tämä -4 blog2 ja varmaan myös 64 CALL init_clrpar() CALL def_clrpar() dminim=large OPEN(40,file=outfile1) OPEN(41,file=outfile2) OPEN(43,file=outfile3) OPEN(78,file=infile,status='old',form='formatted') WRITE(40, *) 'Optimization with LMBM.' WRITE(41, *) 'Optimization with LMBM.' WRITE(43, *) 'Optimization with LMBM.' CALL getime(time1) DO i=1,nrecord READ(78,*) (a(i,k),k=1,nft) END DO WRITE(40,*) 'Number of records: ',nrecord,' Number of features: ',nft WRITE(40,*) !================================================================ IF (noutcom < nft) THEN DO i=1,nrecord j1=0 DO j=1,nft IF (j /= noutcom) then j1=j1+1 a1(j1)=a(i,j) END IF END DO a1(nft)=a(i,noutcom) DO j=1,nft a(i,j)=a1(j) END DO END DO END IF !================================================================= ! Division to training and test sets (not randomly) !================================================================= outerloop: DO nfold = 1,nfoldmax IF (itest <= 2) nrecord2 = nrecord - ntrain IF (itest == 3) THEN nrecord2 = nrecord/nfoldmax nrecord3=(nfold-1)*nrecord2+1 nrecord4=nfold*nrecord2 nl=0 DO j=nrecord3,nrecord4 nl=nl+1 nlist(j-nrecord3+1)=j END DO nl1=0 innerloop1: DO i=1,nrecord DO j=1,nl IF (i == nlist(j)) CYCLE innerloop1 END DO nl1 = nl1+1 DO k=1,nft a4(nl1,k)=a(i,k) END DO nlisttrain(nl1)=i END DO innerloop1 ELSE IF (itest == 1) THEN nl=0 DO k=1,nft DO i=1,nrecord a4(i,k)=a(i,k) END DO END DO DO i=1,nrecord nlisttrain(i)=i END DO !DO i=1,nrecord ! DO k=1,nft ! a4(i,k)=a(i,k) ! END DO ! nlisttrain(i)=i !END DO ELSE IF (itest == 2) THEN DO i=1,ntrain DO k=1,nft a4(i,k)=a(i,k) END DO nlisttrain(i)=i END DO nl=nrecord-ntrain DO i=1,nl nlist(i)=i+ntrain END DO END IF nrecord2=nl nrecord1=nrecord-nrecord2 !================================================ IF (iscale == 1) CALL scaling !=============================================== p2 = zero p3 = zero p4 = zero ncount = 0 nfittot=0 nauxtot=0 naux2tot=0 innerloop2: DO nc=1,nclust naux = 0 naux2 = 0 nfit = 0 neltotal = 0 PRINT 3,nc 3 FORMAT(' The number of hyperplanes:',i5) IF (nc > 1) THEN gamma1=gam1*gamma1 gamma2=gam2*gamma2 gamma3=gam3*gamma3 gamma4=gam4*gamma4 IF (gamma1 > one) gamma1=one IF (gamma2 < 1.005) gamma2=1.005_prec IF (gamma3 < 1.005_prec) gamma3=1.005_prec IF (gamma4 > one) gamma4=one CALL step1(x,xc,ngood0) fmin=large DO i=1,ngood0 DO j=1,nft z(j)=xc(j+(i-1)*nft) END DO d7=large d9=zero d8(:)=zero !DO j=1,nft-1 ! DO k=1,nrecord1 ! d8(k)=d8(k)+(a4(k,j)-z(j))**2 ! END DO !END DO !DO k=1,nrecord1 ! IF (d8(k) <= d7) d7=d8(k) ! IF (d8(k) > d9) d9=d8(k) !END DO DO k=1,nrecord1 d8(k)=zero DO j=1,nft-1 d8(k)=d8(k)+(a4(k,j)-z(j))**2 END DO IF (d8(k) <= d7) d7=d8(k) IF (d8(k) > d9) d9=d8(k) END DO d7=d7+gamma4*(d9-d7) nel1=0 DO k=1,nrecord1 f2=z(nft) DO j=1,nft-1 f2=f2+a4(k,j)*z(j) END DO f3=(a4(k,nft)-f2)**2 IF (f3 < dminim(k)) THEN IF (d8(k) <= d7) THEN nel1=nel1+1 nob1(nel1)=k END IF END IF END DO IF (nel1 > 0) THEN !ns=1 nupdate=0 ! Solving auxiliaty problem ns=3 CALL optim2(z,xc((i-1)*nft+1:),f) CALL auxfunc(xc((i-1)*nft+1:),f) !CALL optim2(z,z1,f) !CALL auxfunc(z1,f) neltotal=neltotal+nel1*nupdate+nrecord1 ncount=ncount+1 !tarvitaanko z1 jonnekin? !DO j=1,nft ! xc(j+(i-1)*nft)=z1(j) !END DO ELSE ! nel1 == 0 f=large END IF fv2(i) = f IF (f < fmin) fmin=f END DO fmin1=gamma2*fmin jpoints=0 DO i=1,ngood0 fv1=fv2(i) IF (fv1 <=fmin1) THEN jpoints=jpoints+1 DO j=1,nft xc(j+(jpoints-1)*nft)=xc(j+(i-1)*nft) END DO END IF END DO CALL cleaning(jpoints,xc) ngood0=jpoints fbest=large DO i=1,ngood0 ns=1 DO j=1,nft z(j)=xc(j+(i-1)*nft) END DO CALL optim2(z,xc((i-1)*nft+1:),f) !CALL optim2(z,z1,f) ncount=ncount+1 !tarvitaanko z1? !DO j=1,nft ! xc(j+(i-1)*nft)=z1(j) !END DO fv(i)=f IF (f < fbest) fbest=f END DO fbest1=gamma3*fbest jpoints=0 DO i=1,ngood0 fv1=fv(i) IF (fv1 <= fbest1) THEN jpoints=jpoints+1 DO j=1,nft xc(j+(jpoints-1)*nft)=xc(j+(i-1)*nft) END DO END IF END DO CALL cleaning(jpoints,xc) ngood0=jpoints fb=large DO i=1,ngood0 ns=2 DO j=1,nft x(j+(nc-1)*nft)=xc(j+(i-1)*nft) END DO CALL optim2(x,x1,f) ncount=ncount+nc IF (f < fb) THEN fb=f DO k=1,nc DO j=1,nft xbest(j+(k-1)*nft)=x1(j+(k-1)*nft) END DO END DO END IF END DO DO k=1,nc DO j=1,nft x(j+(k-1)*nft)=xbest(j+(k-1)*nft) END DO END DO ELSE ! nc == 1 DO j=1,nft z(j)=one END DO nel1=nrecord1 DO k=1,nrecord1 nob1(k)=k END DO nupdate=0 ns=3 CALL optim2(z,x,f) CALL clrfunc(x,f) !CALL optim2(z,z1,f) !CALL clrfunc(z1,f) ncount=ncount+1 !nupdate=0, joten ... neltotalia ei ole l1 koodissa !nupdate muuttuu kun lasketaan tuota arvoa neltotal = nel1*nupdate + nrecord1 !neltotal = nrecord1 ! tarvitaanko z1 johonkin? Jos ei, niin yllä voisi suoraan käyttää x ja tätä ei tarvita. !DO j=1,nft ! x(j)=z1(j) !END DO END IF CALL distribution(x,f) ! tämän suuruutta voi miettiä, pääseekö ikinä f vaikuttamaan? nrecord on aika iso isoilla tehtävillä toler=MAX(tol2,tol2*f/REAL(nrecord1,prec)) !toler=MAX(tol2,MIN(tol2*f/REAL(nrecord1,prec),1000.0_prec)) IF (nrecord > 50000) toler = MAX(tol2,100.0_prec*tol2*f/REAL(nrecord1,prec)) ! tämä blog2, 65 joko 100 tai 10 !IF (nrecord > 15000) toler = MAX(tol2,MIN(100.0_prec*tol2*f/REAL(nrecord1,prec),10.0_prec)) ! PRINT*,toler, 10.0_prec*tol2*f/REAL(nrecord1,prec) !IF (nrecord1 < 200) toler=1.0E-02_prec !IF (nrecord1 < 200) toler=1.0E-06_prec !IF (nc == 1) toler=1.0E-06_prec*f ! IF (nc == 1) toler=1.0E-07_prec*f IF (iscale == 1) THEN call rescaling(x,x4) ELSE DO k=1,nc DO j=1,nft x4(j+(k-1)*nft)=x(j+(k-1)*nft) END DO END DO END IF CALL checkfinal(x4,ff,ff1,rmse1,rmse2,ce1,ce2) ftr(nc,nfold)=ff ftest(nc,nfold)=ff1 rmsetr(nc,nfold)=rmse1 rmsetest(nc,nfold)=rmse2 cetr(nc,nfold)=ce1 cetest(nc,nfold)=ce2 naux2tot=naux2tot+naux2 nauxtot=nauxtot+naux nfittot=nfittot+nfit WRITE(40,*) WRITE(40,573) nc 573 FORMAT('No of hyperplanes:',I4) WRITE(40,*) WRITE(41,*) WRITE(41,*) DO k=1,nc WRITE(41,722) (x4(j+(k-1)*nft),j=1,nft) END DO 722 FORMAT(20f12.5) WRITE(40,543) ff 543 FORMAT('Fit function value:',f24.8) WRITE(40,*) WRITE(40,541) ncount 541 FORMAT('The number of linear regression problems solved:',i10) WRITE(40,542) naux2tot,nauxtot,nfittot 542 FORMAT('The numbers of evaluations:',i10,i10,i10) CALL getime(time4) timef=time4-time1 WRITE(40,*) WRITE(40,141) timef PRINT*,ff,timef WRITE(40,*) write(40,*) WRITE(40,*) p2=p2+REAL(neltotal,prec) p3=p3+REAL(naux*nrecord1,prec) IF(nc == 1) THEN p4=p4+REAL(neltotal,prec) ELSE p4=p4+REAL(nfit*nrecord*nc,prec) END IF IF (timef > tlimit) EXIT outerloop END DO innerloop2 141 FORMAT('CPU time:',f12.3) END DO outerloop 210 CONTINUE WRITE(43,803) 803 FORMAT('Fit function values on training set:') WRITE(43,*) DO i=1,nclust WRITE(43,332) i WRITE(43,331) (ftr(i,j),j=1,nfoldmax) END DO IF (itest >= 2) THEN WRITE(43,804) 804 FORMAT('Fit function values on test set:') WRITE(43,*) DO i=1,nclust WRITE(43,332) i WRITE(43,331) (ftest(i,j),j=1,nfoldmax) END DO END IF 331 FORMAT(5f28.6) 332 FORMAT(i6) WRITE(43,*) WRITE(43,802) 802 FORMAT('RMSE and CE coefficients for training set:') DO i=1,nclust WRITE(43,231) i WRITE(43,230) (rmsetr(i,n1),n1=1,nfoldmax) WRITE(43,230) (cetr(i,n1),n1=1,nfoldmax) WRITE(43,*) END DO IF (itest > 1) THEN WRITE(43,*) WRITE(43,*) WRITE(43,801) 801 FORMAT('RMSE and CE coefficients for test set:') DO i=1,nclust WRITE(43,231) i WRITE(43,230) (rmsetest(i,n1),n1=1,nfoldmax) WRITE(43,230) (cetest(i,n1),n1=1,nfoldmax) WRITE(43,*) END DO 231 FORMAT(i10) 230 FORMAT(10f24.8) END IF CLOSE(40) CLOSE(41) CLOSE(43) CLOSE(78) deallocate(a1,x,x1,x4,xbest,z,d8,fv,fv2) !deallocate(a1,x,x1,x4,xbest,z,z1,d8,fv,fv2) deallocate(ftr,ftest,rmsetr,rmsetest,cetr,cetest) STOP END PROGRAM lmbmclr