#pragma rtGlobals=3		// Use modern global access method.
#pragma ModuleName= GeneticOptimisation_CL

#pragma version = 1.1



///GeneticOptimisation is a IGOR PRO procedure for fitting data: it minimizes a cost (chi2) function using
// a genetic algorithm known as "differential evolution" (DE)

//some references for the DE algorithm and its applications:
//[1] R. Storn and K. Price, Differential evolution – a simple and efficient heuristic for global optimization over continuous spaces,
//  Journal of global optimization 11, 341 (1997).  (original algorithm by Storn&Price)
//[2] S. Das and P. N. Suganthan, Differential evolution: A survey of the state-of-the-art, 
//    IEEE transactions on evolutionary computation 15, 4 (2010).  (Review of different strategies and comparison with other methods.)
//[3] M. Wormington, C. Panaccione, K. M. Matney, and D. K.Bowen, 
// Characterization of structures from x-ray scattering data using genetic algorithms, 
//Philosophical Transactions of the Royal Society of London, Series A 357, 2827 (1999).    (applications in x-ray studies)
// [4] https://en.wikipedia.org/wiki/Differential_evolution


//original version (GeneticOptimisation.ipf) written by Andrew Nelson
//Copyright (C) 2006 Andrew Nelson and Australian Nuclear Science and Technology Organisation


//modified by Pawel Wzietek
// (Copyright 2024 Pawel Wzietek and Université Paris-Saclay)
// mainly to adapt it for parallel processing (GPU) :
// - the user-supplied function (func_chi2array) receives the whole population matrix at each iteration
// - can use (optionally) the OpenCL XOP by Peter Dedecker (https://github.com/pdedecker/IgorCL, GPL license)
// to generate (faster) the matrix of new trial vectors at each iteration using GPU code.
// OpenCL MWC64X random number generator from https://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html
// (but did not succeed to contact the author for license info)
// also many other modifications like :
// -  bfrac parameter (controlling DE strategy)
// -  xtol criterium, variable update rate etc.



//----------------------------------------------
//exported (non static) functions: 


//main function: runs the fit
//Function GEN_optimise_CL(f_chi2array, f_updmodel, coefs, limits, [holdwave, refwave, popmul,k_m,recomb,bfrac, iters, updrate_ct, updrate_m ,quiet, chi2tol,xtol,  pldev,maxCLthreads])


//mandatory parameters:
//f_chi2array, f_updmodel: functions to be supplied (see templates) 
//coefs :  initial coefficient wave, will receive the best solution (in/out parameter)
//limits  : two column matrix defining the bounds for parameter variation 
// limits[parameter n°][0] is the lower bound, limits[parameter n°][1] the upper bound

//optional parameters:

//holdwave: same length as coefs, set 0 if coefficient varies (default), 1 if held at the initial value

// refwave : must be supplied if  GEN_func_chi2array or GEN_func_updmodel use it (see templates below)

//bfrac parameter (between 0 and 1) : fraction of best vectors from which the "base vector" ([4)] will be chosen
//e.g.  bfrac=1 (default) means from all vectors   (strategy named "DE/rand/1" in [2])
//and  bfrac=0 means it is always the best vector   (strategy named "DE/best/1" in [2], it gives faster convergence 
// at the expense of genetic diversity -> more chances to miss the best solution)
// bfrac<1 needs sorting of all vectors at each step

//popmul (default=100):  population multiplier: the population size = popmul x n° of coefficients 

//k_m (default=0.7) : mutation constant (or "differential weight", "F" in [4]) 

//recomb (default=0.5) : recombination constant (or "crossover probability", "CR" in [4])

//iters (default=500) : first stopping criterion: max number of iterations 
//  chi2tol (default=0.005) : second stopping criterion : stop if the ratio stddev/avg of chi2 falls below. Set to zero to disable. 
//  xtol (default=0.02) : thirstopping criterion : stop if maximum euclidean distance in population falls below.   Set to zero to disable.
//                      The distance is based on normalised cordinates (0-1 between lower and upper bounds).    
//updrate_ct, updrate_m (default=1) : frequencies of updating the color table and the call of the user update function
//q (default=0) : set to 1 for quiet mode (don't print results) 

//  pldev (default=-1) : if non-negative the generation of trial vectors will run on GPU, pldev defines the platform (bits2,3)
// and device (bits 0,1), i.e. pldev=4*platform n° + device (e.g. set pldev=4 for device 0 on platform 1); numbering according to the IgorCL XOP (IgorCLInfo)  

// maxCLthreads (default=3000) : if pldev>=0, will define the max. number of threads to use (set approx. to the number of cores for optimum performance).


// other functions:
//function GEN_clear()  //discards some of internal data (may be used before saving the pxp file to disk to save space)

//functions to retrieve some other values relative to the last fit
//function GEN_chi2best()  //returns the value of the cost function for the last best vector
 //function GEN_chi2avg()  //returns the average of the cost function for the last population
//function GEN_chi2dev()  //returns the std.dev. of the cost function for the last population
//function GEN_maxdist()  //returns the maximum euclidean distance for the last population
//function GEN_coefdev(i)  //returns the std.dev. of the coef n°i for the last population (error estimation)
//								//NB. will return NaN after GEN_Clear was executed (pupulation matrix discarded)	
//function GEN_coefcov(i,j) // return the covariance matrix element i,j calculated on the last population
//function GEN_coefcorr(i,j)  //return the correlation coefficient (cov(i,j)/(sdev(i)*sdev(j))

//exported function templates: GEN_func_chi2array, GEN_func_updmodel

//Function GEN_func_chi2array(popmatrix, chi2array, [refwave])
//	wave popmatrix //input: population matrix (coef vectors) : popmatrix[parameter n°][vector n°]
//	wave chi2array//output: array of chi2 values
// refwave: optional Reference Wave if you want to pass other waves without need for runtime lookup)
//	
//End
//Function GEN_func_updmodel(coefs, [refwave])  //will be called whenever the best vector changes 
//	Wave coefs  
// refwave: optional Reference Wave if you want to pass other waves without need for runtime lookup)  
//	
//End




//-------------------------------------------------------------
#if Exists("IgorCL")
 #define IGOR_CL        //OpenCL XOP available 
#endif

static strconstant GEN_DF="root:GEN_optimise"

static Structure GEN_optimisation
Wave coefs		//initial coefficients
Wave holdwave		//which coefficients are held constant
Wave limits			//limits on  parameters

wave nhidxwave         //wave containing indices of non-hold coefs (to accelerate trialvector)

Variable popmul  //total population is popmul*number of parameters
Variable k_m
Variable recombination  //larger recombination constant means greater genetic diversity
variable bfrac         //fraction of best vectors ("b vector") used to make new trial vectors

//ext functions
Funcref GEN_func_chi2array f_chi2array
Funcref GEN_func_updmodel f_updmodel


Wave population_matrix
Wave chi2array

wave/wave refwave
variable userefwave

variable idx_best       //index of current best element in chi2array

variable max_generations
variable generations_done
variable aborted	


Variable chi2tol       //convergence criterium in chi2
variable xtol               //convergence crit in x  (if xtol!=0)

String callfolder       
variable quiet //don't print stuff in history area

wave chi2trend   //history of best chi2
wave newbestvsgen    //for additional convergence criterium
wave colourtable
variable updrate_ct  //freq of update of colourtable
variable updrate_m  //freq of update of model

variable ntrials  //number of chi2 evaluations done so far
variable timestart  //datetime

variable tchi       //total chi2 time  



//
variable pldev //for make_trials : -1 for Igor, otherwise  platform/device  for CL: bits 0-1 device, bits 2-3 platform
		           	//so pldev=4*platform+device
		           	
variable maxCLthreads   //set to the number of GPU cores for best performance, must be less than max allowed gsze_0 for your gpu 		           	
							 //default is 3000 	
wave GEN_CompiledBinary  //OpenCL binary

//auxiliary waves to speed up some calculations
wave limvec         //


Endstructure
//***************************************************



//exported functions
function GEN_clear()  //clear some of the gen data (largest arrays) to free some memory


dowindow/k evolve
killwaves/z $(GEN_DF+":GEN_populationmatrix")
killwaves/z $(GEN_DF+":GEN_chi2array")	
killwaves/z $(GEN_DF+":GEN_colourtable")		

end

function GEN_chi2best()
return getvaringen("chi2best")
end

function GEN_chi2avg()
return getvaringen("chi2avg")
end

function GEN_chi2dev()
return getvaringen("chi2dev")
end

function GEN_maxdist()
return getvaringen("maxdist")
end



//external function templates:
Function GEN_func_chi2array(popmatrix, chi2array, [refwave])
	wave popmatrix //input: matrix of coef vectors like in populationmatrix : popmatrix[parameter n°][vector n°]
	wave chi2array//output
	wave/wave refwave
	

End
Function GEN_func_updmodel(coefs, [refwave])  //will be called if the best vector changes after an iteration (or several iterations) and at the end
	Wave coefs    
   wave/wave refwave

End





//*********************************
// calling user functions
static function call_f_chi2array(gen, popmatrix, chi2array)
   	Struct GEN_optimisation &gen
   wave popmatrix 
   	wave chi2array
   	
   	variable ti=stopmstimer(-2)
  	
   	
   if (gen.userefwave)
        gen.f_chi2array(popmatrix, chi2array, refwave=gen.refwave)
    else
        gen.f_chi2array(popmatrix, chi2array)
   endif 
   
    gen.tchi += (stopmstimer(-2)-ti)*1e-6   
end

static function call_f_updmodel(gen, coefs)
	Struct GEN_optimisation &gen
   wave coefs 

   
   if (gen.userefwave)
        gen.f_updmodel(coefs, refwave=gen.refwave)
    else
        gen.f_updmodel(coefs)
   endif     
end


//******************************
//returns the std.dev. (variance estimator) of the coef n°i for the last population (error estimation)
function GEN_coefdev(i)  
variable i



wave/Z popmatrix=getwaveingen("GEN_populationmatrix")

if(  ! WaveExists(popmatrix) )
	return nan
endif



if(i>dimsize(popmatrix, 0)-1)
return nan
else
wavestats/q /RMD=[i,i][] popmatrix
return v_sdev
endif


end

//calculate and return the covariance matrix element i,j
function GEN_coefcov(i,j)  
variable i,j



wave/Z popmatrix=getwaveingen("GEN_populationmatrix")

if(  ! WaveExists(popmatrix) )
	return nan
endif


if(i>dimsize(popmatrix, 0)-1 ||  j>dimsize(popmatrix, 0)-1)
return nan
endif

make/free/N=(dimsize(popmatrix,1))  wcorr

wavestats/q /RMD=[i,i][] popmatrix
variable avi=v_avg
wavestats/q /RMD=[j,j][] popmatrix
variable avj=v_avg

wcorr=(popmatrix[i][p]-avi)*(popmatrix[j][p]-avj)

wavestats/q wcorr

return v_avg

end

//calculate and return the correlation coefficient 
function GEN_coefcorr(i,j)  
variable i,j



wave/Z popmatrix=getwaveingen("GEN_populationmatrix")

if(  ! WaveExists(popmatrix) )
	return nan
endif


if(i>dimsize(popmatrix, 0)-1 ||  j>dimsize(popmatrix, 0)-1)
return nan
endif

make/free/N=(dimsize(popmatrix,1))  wcorr

wavestats/q /RMD=[i,i][] popmatrix
variable avi=v_avg
variable sdi=v_sdev
wavestats/q /RMD=[j,j][] popmatrix
variable avj=v_avg
variable sdj=v_sdev

wcorr=(popmatrix[i][p]-avi)*(popmatrix[j][p]-avj)

wavestats/q wcorr

return v_avg/(sdi*sdj)

end








//**************  main function
Function GEN_optimise_CL(f_chi2array, f_updmodel, coefs, limits, [holdwave, refwave, popmul,k_m,recomb,bfrac, iters, updrate_ct, updrate_m ,quiet, chi2tol,xtol,  pldev,maxCLthreads])


Funcref GEN_func_chi2array f_chi2array
Funcref GEN_func_updmodel f_updmodel
	Wave coefs  //initial guess
	wave holdwave
	wave/wave refwave
   wave limits    //mandatory
	variable popmul,k_m,recomb,iters,chi2tol,quiet,xtol,bfrac
	variable updrate_ct, updrate_m
	variable pldev //-1 for igor-only code, otherwise 4*platform+device for IgorCL (default is 0 for first available)
   variable maxCLthreads 	//must be less than max allowed gsze_0 for your gpu, default is 3000 which should be ok work for any recent gpu
       //you may set it to the number of GPU cores (or slightly less) for best performance   		           	

	//-------
		
	Struct GEN_optimisation gen
	
	
	gen.ntrials=0 
	gen.aborted=0
 

	
	//where are you calling the function from?
	//(in case we need to change it, but now all gen waves are handled by refs only, no need to change folder)
	
	gen.callfolder=getdatafolder(1)

 
   Newdatafolder/o $GEN_DF  //all temporary waves go here
   


//store funcrefs

Funcref GEN_func_chi2array gen.f_chi2array=f_chi2array
Funcref GEN_func_updmodel gen.f_updmodel=f_updmodel


//check and store other arguments
wave gen.coefs=coefs
	
	//quiet mode
	
	if(ParamIsDefault(quiet))
		gen.quiet=0
	else
		if(quiet!=0)
			quiet=1
		endif
		gen.quiet=quiet
	endif

			
//pldev and maxclthreads only considered when 	IgorCL XOP is available			
	
#ifdef IGOR_CL

	if(ParamIsDefault(maxCLthreads))
	    gen.maxCLthreads=3000
	else
	   gen.maxCLthreads=maxCLthreads
	endif    

	if(ParamIsDefault(pldev))

     
      gen.pldev=-1      
   	
	else 
	  gen.pldev=pldev  //user setting
	
	endif
	   
		
#else
    gen.pldev=-1
#endif
	
	
	//make the holdwave into a GEN_holdwave or generate a default zero holdwave

   make/o/n=(numpnts(coefs)) $fullname("GEN_holdwave")  //
   Wave gen.holdwave=$(fullname("GEN_holdwave"))	
	
	if(! ParamisDefault(holdwave))
	     //check the holdwave
	     if(dimsize(holdwave,0)!=dimsize(coefs,0))
		                 abort "holdwave needs to be same length as coefficient wave"
		  endif
	     gen.holdwave = holdwave[p]!=0
	  else
	     gen.holdwave = 0 
	 endif 
	 
//extract indices of variable coefs
 make/I/o/n=0 $fullname("GEN_nhidxwave")  //
   Wave gen.nhidxwave=$(fullname("GEN_nhidxwave"))	

Extract/o/indx   holdwave,  gen.nhidxwave,  holdwave==0

if(numpnts(gen.nhidxwave)==0)

    abort "all parameters are fixed : nothing to do !"

endif
	 
	   
	
	
	//intial setup, e.g. numgenerations, mutation constant, etc.
   		setdefaultparams(gen)

	if(! ParamisDefault(popmul))
          gen.popmul = popmul
   endif
   if (!  ParamisDefault(k_m) )
      gen.k_m = k_m
   endif
   
   if(!  ParamisDefault(recomb) )
      gen.recombination = recomb
   endif
   
   if(!  ParamisDefault(iters))
     gen.max_generations = iters
   endif

   if(! ParamisDefault(updrate_ct))
      if(updrate_ct>0)
          gen.updrate_ct = updrate_ct
       else
          gen.updrate_ct =   gen.max_generations
       endif           
  
   endif
         if(! ParamisDefault(updrate_m))
      if(updrate_m>0)
          gen.updrate_m = updrate_m
       else
          gen.updrate_m =   gen.max_generations
       endif           
  
   endif
       

   
   if(! ParamisDefault(chi2tol))
      gen.chi2tol = chi2tol
  
   endif

	
	if(! ParamIsDefault(xtol))
		gen.xtol=xtol
	endif
	
	if(! ParamIsDefault(bfrac))
       		gen.bfrac=max(0,min(bfrac, 1))				
	endif
	
	if(! ParamIsDefault(refwave))
		
		if(WaveType(refwave, 1)==4)
		    wave/wave gen.refwave=refwave
		    gen.userefwave=1
		else  //in principle should not happen:
		  abort "The supplied refwave is not a Reference Wave" 
		endif 	   
	else
	    gen.userefwave=0  //if absent, refwave will not be used
	endif	

			
	
	if(dimsize(limits,1)!=2)
			abort "user supplied limit wave should be 2 column"
		endif
	
	if(dimsize(limits,0) != dimsize(coefs,0))
			abort "user supplied limit wave should be the same length as the parameter wave"		
		endif

		
	//limits ok, copy	
	duplicate/o limits, $(GEN_DF+":GEN_limits")		
	Wave gen.limits=$(GEN_DF+":GEN_limits")  //bind
			//make sure that the initial guess is between the limits
	variable ok=checkinitiallimits(gen,gen.coefs)

		if(! ok)

					ABORT "error in limits or initial values"
		endif

	
		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	//parameters checked, start fitting
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

	
	//set up the waves for the genetic optimisation
	makeinit_waves(gen)  //make and bind 
		

	

   if(!gen.quiet)
		print "_________________________________\rStarting Genetic Optimisation"
   endif
   
#ifdef IGOR_CL
 
   	if (gen.pldev >=0)  //init the CL code, 
		  
	  compileCL(gen) //if errors will print message and fallback to the all-igor version
		
	endif

#endif



	gen.timestart=datetime 
		gen.tchi=0
		
//make global variables to store result and stats  on population

makevaringen("chi2best")
makevaringen("chi2dev")
makevaringen("chi2avg")
makevaringen("maxdist")

		

	//make a whole set of guesses based on the parameter limits just created
	init_popmatrix(gen)
	
  //calculate the chi2 array for this population
   call_f_chi2array(gen, gen.population_matrix, gen.chi2array)




		//make a wave to follow the trend in Chi2 and a wave to store one best per gen
		
	wave gen.Chi2trend = makeingen(gen, "GEN_chi2trend", 0)  //update when improved, this for first tolerance calc and stop criteria
	wave gen.newbestvsgen = makeingen(gen, "GEN_newbestvsgen", gen.max_generations)  //boolean: 1 if improvement, vs gen, for additional criterium 
  
	
	
	//show initial state
	

	if(strlen(Winlist("evolve",";",""))==0)
	
	   showwindow(gen)
	   
	endif 	
		


variable ii


	for (ii=0; ii<dimsize(gen.population_matrix, 1);ii+=1)		
	    	gen.colourtable[][ii]=256*abs(gen.population_matrix[p][ii]-gen.limits[p][0])/gen.limvec[p]	
	endfor


//wrong gen.colourtable=256*abs(gen.population_matrix[p][q]-gen.limits[p][0])/gen.limvec[p]




//make local storage


   duplicate/free   gen.coefs bvector  //make best vector wave
	
	try				//the user may try to abort the fit, especially if it takes a long time	



		// first best:
		Wavestats/q/z/M=1 gen.chi2array
		gen.idx_best=V_minloc  //index of best perfoming  vector from population
      //check if the initial chi2array is valid
    if(V_numNaNs>0)
       print "WARNING:  your chi2 array contains NANs, the fit is likely to fail"
    endif   
    		

       bvector=gen.population_matrix[p][gen.idx_best] 
		
		   //call user update on current starting bvector
    	call_f_updmodel(gen, bvector)
		
		
		gen.Chi2trend[0]=gen.chi2array[gen.idx_best]

		
		
		//*************  
		
    
    	if(!gen.quiet)
		print "Initial Chi2: "+num2str(gen.chi2array[gen.idx_best])
	endif
     

		

		//  ************    now enter the fitting loop to improve it
		optimiseloop(gen)


		returnresults(gen)
	catch		//if the user aborts during the fit then send back the best position so far
	  
	   gen.aborted=1							      
    	
		returnresults(gen)   //will also call user update on last b_vector
		
		abort   //resume abort (necessary if gen_optimize was called in a batch loop) 
		
	endtry


End

//find maximum normalized euclidean distance from a vector at index idx
static function  maxdistance(gen, idx)
	Struct GEN_optimisation &gen
   variable idx
   
   variable dmax=0
   variable i

	variable veclen=dimsize(gen.coefs,0)// 
   variable 	nvec=dimsize(gen.population_matrix,1)
   
   wave bvec=extractbestvector(gen)
   
   make/free/N=(veclen) idxvec,dv
   
   idxvec=gen.population_matrix[p][idx]
   
    //
    //check if a coef was set to nan 
    wavestats/q/m=1 idxvec
    
   
   for(i=0; i<nvec; i+=1)
   
      if (v_numnans==0)
      dv=(idxvec[p]-gen.population_matrix[p][i])/gen.limvec[p] //normalized euclidean coordinates
      else
      
      dv=(numtype(idxvec[p])==2) ? 0: (idxvec[p]-gen.population_matrix[p][i])/gen.limvec[p]
      
      endif
      
      variable dn=norm(dv)
      
      if (dn>dmax)
         dmax=dn
      endif   
      
   
   endfor
   
   return dmax

end

Static Function setdefaultparams(gen)
	Struct GEN_optimisation &gen

	gen.max_generations=500
	gen.popmul=100  //multiplier: total population is popmul*number of parameters
	gen.k_m=0.7
	gen.recombination=0.5
	gen.chi2tol=0.005
	gen.bfrac=1
	gen.xtol=0.02
	gen.updrate_ct=1
	gen.updrate_m=1
	End
//****************************************
Static Function optimiseloop(gen)
	//this does all the looping of the optimisation
	Struct GEN_optimisation &gen
	
	
	string tagtext
	variable ii,jj,hh, kk,tagchi2,nvec
	
	
	Dowindow/F evolve
	
	nvec=dimsize(gen.population_matrix,1)
	
	//local storage:
	variable veclen=dimsize(gen.coefs, 0)

	//make/n=(veclen)/free    trial_vector

	make/free/n=(veclen,nvec) population_matrix_trial

	make/free/n=(nvec) chi2array_trial
	
	make/I/free/n=(nvec) update_ct_array
	update_ct_array=0
	
	//to store the order of current chi2 matrix:
	make/free/n=(nvec)  chi2idx
	 chi2idx=p  //default numbering (if bfrac=1)
   
   variable updmflag=0  //flag to update model
  
	gen.generations_done=0
	
	//---------------------
for(kk=0;kk<gen.max_generations;kk+=1)			//loop over the generations
	

//here we want to calculate the chi2 matrix in a single call (so it can be parallelized in OpenCL or similar) 
//therefore there are two steps:
//preparing the trial population,  calculating "chi2trial" matrix and then replacing better vectors in the population. 
// This parallelized version is close to the original description (Storn and Price, Journal of Global Optimization, 11, 341,1997)
// but is a bit different from the one from the original ipf file where the best vectors are immediately stored and used 
// to create subsequent trial vectors in the same generation (also similar to the Wikipedia version of the algorithm).
// This approach is necessary if the chi2 matrix calculation is to be done in a single step,
// by an external function separated from the optimization algorithm




if(gen.bfrac<0.99)
 makeindex gen.chi2array, chi2idx

endif

//generate "raw" trial vectors from current vectors, "raw" means hold not taken into account yet (faster)

if (gen.pldev<0)
 
 make_trials_Igor(gen, chi2idx, gen.population_matrix, population_matrix_trial)
 
else


#ifdef IGOR_CL   //a much faster version can be used if OpenCL XOP available:

 executeCL_maketrials(gen, chi2idx, gen.population_matrix, population_matrix_trial)


#else
  //should not happen (pldev was checked before) 
  abort "error: IGOR_CL not defined"
#endif


endif




//chi2 on trial matrix: (this is the main/possibly slowest part of the calculation  (but can be parallelized)

 call_f_chi2array(gen, population_matrix_trial, chi2array_trial)  

gen.ntrials += nvec



//init flags of "best" events
gen.newbestvsgen=0

		
//here we choose the vectors to be replaced in the population

	for(ii=0;ii<nvec;ii+=1)
	//if the trial vector is better than the original then replace it (will only be effective in the next generation).
			
		  if (chi2array_trial[ii]<gen.chi2array[ii])
		 
		       gen.population_matrix[][ii]=population_matrix_trial[p][ii]
			    gen.chi2array[ii]=chi2array_trial[ii]
				 
				 update_ct_array[ii]=1  //mark color table points to update

          
			endif
	endfor //ii

//best:
Wavestats/q/z/M=1 gen.chi2array
variable currentbestchi2=V_min



if(currentbestchi2 < gen.chi2array[gen.idx_best])
		gen.idx_best=V_minloc  //index of new best perfoming  vector from population
  
					//mark improvment of bvector in current gen										
		gen.newbestvsgen[kk] = 1
		updmflag=1
endif
	//if new best has changed check if model is to update
if (updmflag && mod(kk+1, gen.updrate_m)==0)
			// then call user update on the new best vector
    	 call_f_updmodel(gen, extractbestvector(gen))	
    	 updmflag=0 //reset flag

       
endif

//check if colortable is to update
if(mod(kk+1, gen.updrate_ct)==0)

  Extract/free/indx update_ct_array, update_idx, update_ct_array==1   
     
   variable iu
   for(iu=0; iu<numpnts(update_idx) ; iu+=1)
     ii=update_idx[iu]
	  gen.colourtable[][ii]=256*abs(gen.population_matrix[p][ii]-gen.limits[p][0])/gen.limvec[p]
   endfor
   

   update_ct_array=0  //reset



endif

		
//variable dt=(stopmstimer(-2)-ti)*1e-6
//print dt		
		
		 gen.generations_done=kk
		
		//end of current generation: check if convergence reached
		if(kk>3)  //min 4 gens to check	
		
		    if(gen.chi2trend[numpnts(gen.chi2trend)-1]==0)  //should not happen if chi2 defined correctly
		
		     print "chi2 reached 0 after ",kk, " iterations" 
            print ", max distance:", maxdistance(gen, gen.idx_best) 
					 					  							
			 return 1
		
		    endif
		

		
		
		
		//tolerance conditions checked only if no improvement over last 3 iterations					 
		if(gen.newbestvsgen[kk]==0 && gen.newbestvsgen[kk-1]==0 && gen.newbestvsgen[kk-2]==0)	
		
			variable tolok=0	      
		     
		     if(gen.chi2tol>0)
		         
		         wavestats/q gen.chi2array
		     			     
		         variable dchi2=V_sdev/V_avg    //ratio standard dev/mean characterizes the scattering of chi2 values 
					
				   tolok=dchi2<gen.chi2tol
			  endif	   
					                	
			  if((tolok||gen.chi2tol==0) && gen.xtol>0)  //second criterion only checked when first is ok or not used

					       
					variable xdist=maxdistance(gen, gen.idx_best) //last criterion, most expensive to calculate
					tolok = (tolok||gen.chi2tol==0) & xdist < gen.xtol  
					       
				endif
					       
				if (tolok) 
					 break        		  
						
				endif
	
			endif
		
		endif

	  
	
      //update tag
      	tagtext="\Z12Generation: "+num2istr(kk) + " Chi2="+ num2str(gen.chi2array[gen.idx_best])+ " ;    "+num2str(gen.ntrials)+" vectors tried"
		
		
		
		try  //in case window closed
		Tag/W=evolve/A=LT/c/n=text0/f=0/x=0/y=-100/l=0 $(NameOfWave(gen.colourtable)), 0, tagtext
       catch
       endtry
       
       

      //add current best chi2 to Chi2trend curve for display
		redimension/n=(numpnts(gen.chi2trend)+1) gen.chi2trend
		gen.chi2trend[numpnts(gen.chi2trend)-1] =  gen.chi2array[gen.idx_best]
					
    
		//update the convergence image and Chi2trend curve
		if(mod(kk, gen.updrate_ct)==0)
		doupdate/W=evolve
		endif
	endfor
	
			 	
	 gen.generations_done=kk

	
	//after all this looping the best vector should be in gen.idx_best
	
	End


//*****************
// make  trial vectors: Igor version
//chi2idx is the indexing wave for chi2 in popmatrix, to select among "bfrac" of best vectors

static function make_trials_Igor(gen, chi2idx, popmatrix, popmatrix_trial)
 Struct GEN_optimisation &gen
wave chi2idx, popmatrix, popmatrix_trial



variable veclen=dimsize(popmatrix,0)
variable nvec=dimsize(popmatrix,1)
variable ii

make/n=(veclen)/free   p_vector, br_vector, bprime_vector, trial_vector, p_a, p_b

		for(ii=0;ii<nvec;ii+=1)

			
			variable random_a=round(abs(enoise(nvec-0.50000001)))
			variable random_b=round(abs(enoise(nvec-0.50000001)))
			
			variable rndb=round(abs(enoise(nvec*gen.bfrac-0.50000001)))
			variable random_best=chi2idx[rndb]  //index among best vectors (if chi2idx sorted), or all vectors (chi2x sequential)
	
			p_a[]=popmatrix[p][random_a]
			p_b[]=popmatrix[p][random_b]
			
			br_vector=popmatrix[p][random_best]
			

	//now set the difference vector bprime
	//the difference vector is set from one of the best  vectors, br_vector, and an amount of p_a and p_b
	//if the mutation constant k_m is higher, you are introducing more randomness.
	
		// note the correspondence between the variables names (here from the original code by AN)
   //  and  wikipedia notation (https://en.wikipedia.org/wiki/Differential_evolution): 
    //    Y=a+F*(b-c)
      
	
			bprime_vector=br_vector+gen.k_m*(p_a-p_b) 

			p_vector[]=popmatrix[p][ii]  //current vector to modify
			
			//now set up the trial vector using current population vector  and bprime 
			//nhidxwave is used to avoid held positions and to count true changes 
			trialvector(gen.recombination, p_vector, bprime_vector, trial_vector, gen.nhidxwave)  

			//make sure that the trial vector has values within the limits
			ensureconstraints(trial_vector, gen)						
			
			//insert in trial matrix 
			popmatrix_trial[][ii]=trial_vector[p]			
					
     endfor 


	
	
End



Static Function trialvector(recomb, p_vector, bprime_vector, trial_vector, nhidx)   //sets trial_vector
variable recomb
wave p_vector, bprime_vector, trial_vector, nhidx

	//
	//this function creates a trial vector from bprime and pvector:
	 //  Pick a random index R 
 //   for each index  pick a uniformly distributed random number ri in ( 0 , 1 )
 //   if ri < recomb or i = R  then fill from bprime else fill from p
	
	
 
	           

	variable nhsize=dimsize(nhidx,0)
	
	
	variable R=trunc(abs(enoise(nhsize))-0.00001)	  //integer between 0 and size-1
	
	variable id, ii,ri	

	trial_vector=p_vector //copy original
	
	for(id=0;id<nhsize;id+=1)					

		ri = abs(enoise(1))  //between 0 and 1       
        
		if(ri<=recomb || R==id)
		    ii=nhidx[id] //next non-hold position  
			trial_vector[ii]=bprime_vector[ii]
			
		endif
			
	endfor
		
	
	
End

Static Function ensureconstraints(trial_vector, gen)
//this function makes sure that the trial vectors stay within the set limits (except "hold").
wave trial_vector
   Struct GEN_optimisation &gen


	variable ii, id
	variable nhsize=Dimsize(gen.nhidxwave,0)
	
	variable lowerbound,upperbound
	
	for(id=0;id<nhsize;id+=1)					

       ii=gen.nhidxwave[id] //next non-hold position

		lowerbound=gen.limits[ii][0]
		upperbound=gen.limits[ii][1]
		if(trial_vector[ii]<lowerbound || trial_vector[ii]>upperbound)	//are we in the limits?
			trial_vector[ii]=(lowerbound+upperbound)/2+enoise(1)*(upperbound-lowerbound)/2		//if not generate another random value
	    endif
	endfor
End




Static Function checkinitiallimits(gen, vec)
   Struct GEN_optimisation &gen
	Wave vec
	
	
	variable ii,lowlimit,upperlimit,parameter,ok=1

	for(ii=0;ii<dimsize(vec,0);ii+=1)
	
	  if (gen.holdwave[ii]==0) //only test for non hold coefs

		lowlimit=gen.limits[ii][0]
		upperlimit=gen.limits[ii][1]
		parameter=vec[ii]
		if(lowlimit>upperlimit)
			doalert 0, "your lower limit n°"+num2str(ii)+" is bigger than your upperlimit"
			ok=0
			break
		endif	
		if(parameter<lowlimit || parameter > upperlimit)
			doalert 0, "your parameter n°"+num2str(ii)+" is outside one of the limits"
			ok=0
			break
		endif
			
	  endif	
	endfor
	return ok
End





//adds GEN folder to wave name
static function/s fullname( nw)
string nw

return GEN_DF+":"+nw

end

//makes 1D wave in the GEN folder  then returns wave ref
static function/wave makeingen(gen, nw, len)
	Struct GEN_optimisation &gen
string nw  //name
variable len


string fnw=fullname(nw)  //full name with folder
make/o/n=(len) $fnw
wave w=$fnw

return w

end

static function makevaringen(nv)
string nv

variable/G $fullname(nv)
nvar v=$fullname(nv)
v=nan

end

static function setvaringen(nv, val)
string nv
variable val

nvar v=$fullname(nv)
v=val

end

static function getvaringen(nv)
string nv

nvar v=$fullname(nv)
return v

end

static function/wave getwaveingen(nv)
string nv


wave/z v=$fullname(nv)
return v

end

Static Function makeinit_waves(gen)
	//this function makes and binds main waves used in calculation and referenced in gen
	Struct GEN_optimisation &gen


   variable n=numpnts(gen.coefs)
	
	
	//make population matrix
	string npw=fullname("GEN_populationmatrix")
	make/o/n=(n,gen.popmul*n) $npw
	wave gen.population_matrix=$npw  //bind
	
	variable nvec=dimsize(gen.population_matrix, 1)
		
	
   //make chi2 array

	 wave gen.chi2array = makeingen(gen, "GEN_chi2array", dimsize(gen.population_matrix,1))

    //other
    wave  gen.limvec = makeingen(gen, "GEN_limvec", n)     
    	
	 gen.limvec=abs(gen.limits[p][1]-gen.limits[p][0])
	 
	 	//create a table to illustrate the evolution
		
	string nw
	nw=fullname("GEN_colourtable")
	make/I/o/n=(n,nvec)  $nw
	wave gen.colourtable=$nw
   

End


//initial population
Static Function init_popmatrix(gen) 
	Struct GEN_optimisation &gen
	//GEN_b is the best guess, 
	//GEN_b should already lie in between the limits!
	//Wave GEN_limits=gen.limits limits[][0 or 1] are the lower/upper limits for the fit
	
	//initialise loop counters
	Variable ii=0,jj=0,kk=0,nv,nc

	//random will be a random number.  Lowerbound and upperbound are the limits on the parameters
	Variable random,lowerbound,upperbound
	//initialise GEN_populationvector, within the limits set by GEN_limits
	//first column is the initial parameters
	gen.population_matrix[][0]=gen.coefs[p]
	
	//the rest should be created by random numbers.
	//go through each row/column one by one
	nv=Dimsize(gen.population_matrix,1)//vectors
	nc=Dimsize(gen.population_matrix,0)

	for(ii=0;ii<nc;ii+=1)
	
	  if (gen.holdwave[ii]==0)
	   
	   	  for(kk=1;kk<nv;kk+=1)
	   
			lowerbound=gen.limits[ii][0]
			upperbound=gen.limits[ii][1]
			//generate a random variable for that parameter
			random=(lowerbound+upperbound)/2+abs(lowerbound-upperbound)*enoise(0.5)
			gen.population_matrix[ii][kk]=random
						
		  endfor
		  
		else
		gen.population_matrix[ii][]=gen.coefs[ii]
		endif
		
		
	endfor
		
End


//return a population vector as free wave
static Function/wave extractvector(gen, idx)
	Struct GEN_optimisation &gen
	variable idx
	
	duplicate/free gen.coefs vec
	
	vec=gen.population_matrix[p][idx]
	
	return vec
	
	end
	
//current best vector from idx_best, as free wave
static Function/wave extractbestvector(gen)
	Struct GEN_optimisation &gen
	
	duplicate/free gen.coefs vec
	
	vec=gen.population_matrix[p][gen.idx_best]
	
	return vec
	
	end


// 
Static Function returnresults(gen)
	Struct GEN_optimisation &gen

	//return to the original datafolder
	//(in this version it did not change though)
	//Setdatafolder $gen.callfolder
	

   wave bvector=extractvector(gen, gen.idx_best)
   gen.coefs=bvector   //copy best solution to the user coefs wave

   call_f_updmodel(gen, bvector)
   
 gen.colourtable=256*abs(gen.population_matrix[p][q]-gen.limits[p][0])/gen.limvec[p]
	
	variable chi2best=gen.chi2array[gen.idx_best]
  
   	wavestats/q gen.chi2array
   
   variable chi2dev=V_sdev
   variable chi2avg=V_avg
   variable maxdist=maxdistance(gen, gen.idx_best)
   
   //store in global variables
   setvaringen("chi2best", chi2best)
   setvaringen("chi2dev",chi2dev)
   setvaringen("chi2avg", chi2avg)
   setvaringen("maxdist", maxdist)
	

	if(!gen.quiet)
				
			variable totaltime	= datetime-gen.timestart
			string ts="total time: "+num2str(totaltime) + " s (chi2: "+ num2str(gen.tchi)+" s)"	
			if (gen.aborted)
			  print "fit aborted after "+num2str(gen.generations_done)+ " generations, "+ ts				         
 			  else
			   if(gen.generations_done>=gen.max_generations)
	          print "max n° of iterations reached, ", ts	
	          else		 
			   print "tolerance reached after "+num2str(gen.generations_done)+ " iterations, " + ts
			   endif
			endif

			 print "normalized chi2 deviation: "+num2str(chi2dev/chi2avg)+ ", max distance: "+num2str(maxdist) 
			 		
		      variable tps=gen.ntrials/totaltime

		       print num2str(gen.ntrials)+ " vectors tried, "+num2str(tps)+ " per second" 
			 
	
		print "The refined Chi2 value was "+num2str(chi2best)+"\r_________________________________"

	endif

End




//for use with pldev modes:
static Function isbitset(value,bit)  //bits numbered from 0
		variable value,bit
		
	variable v=value
	variable ii  
	for(ii=0; ii<bit; ii+=1)
		v=floor(v/2)
	endfor
	
	return mod(v,2)
End

//*********************************
static function showwindow(gen)
	Struct GEN_optimisation &gen

		//Display/K=1/N=evolve 
		//AppendImage/W=evolve  gen.colourtable
		newimage/K=1/N=evolve gen.colourtable
		
		Modifygraph/w=evolve width=320,height=320
		ModifyImage/w=evolve $nameofwave(gen.colourtable) ctab= {0,256,Rainbow,0}
		ModifyGraph/w=evolve mirror(left)=1,mirror(top)=0,minor(top)=0;DelayUpdate
		ModifyGraph/w=evolve axisEnab(left)={0.52,1}
		Label/w=evolve left "pvector";DelayUpdate
		Label/w=evolve top "parameter"
		 ModifyGraph/w=evolve lblPos(left)=10


      AppendToGraph/w=evolve /L=chi2/B=generation gen.chi2trend
      label/w=evolve chi2, "Chi2"
     ModifyGraph/w=evolve axisEnab(chi2)={0.05,0.48} 
      ModifyGraph/w=evolve axisEnab(generation)={0.1,1} 
      Label/w=evolve generation "generation"
      ModifyGraph/w=evolve lblPos(generation)=10
       ModifyGraph/w=evolve lblPos(chi2)=0
      ModifyGraph/w=evolve log(chi2)=2, minor(chi2)=1  //log 2

// optionally we might add a second curve to show how many improvements/generation were found:      
//       AppendToGraph/w=evolve /R=imp/B=generation  gen.nbestvsgen
//       ModifyGraph rgb($nameofwave(gen.nbestvsgen))=(2,39321,1)
//      label imp, "improvements"
//      //label generation "generation"
//     ModifyGraph/w=evolve axisEnab(imp)={0.05,0.48} 
      
		Doupdate


end

//************************************


//******    OpenCL functions: can only be defined if IgorCL XOP present
#ifdef IGOR_CL     



static constant kIgorCLUseHostPointer = 8
static constant IgorCLReadWrite = 1
static constant IgorCLWriteOnly = 2
static constant IgorCLReadOnly = 4
static constant IgorCLIsLocalMemory = 16
static constant IgorCLIsScalarArgument = 32





//----------
static Function compileCL(gen)
	Struct GEN_optimisation &gen



string nbin=fullname("GEN_CompiledBinary")

variable dev=isbitset(gen.pldev, 0)+2*isbitset(gen.pldev, 1)
variable pltf=isbitset(gen.pldev, 2)+2*isbitset(gen.pldev, 3)

if(!gen.quiet)	
print "compiling OpenCL code, platform ",pltf,", device ", dev	
endif


IgorCLCompile/Z /PLTM=(pltf) /DEV=(dev) /DEST=$nbin CLcode(gen)

print s_buildlog
variable clerr=v_flag


if (clerr==0)
wave gen.GEN_CompiledBinary=$nbin  //bind created wave to the corresponding ref-wave field in gen

else

gen.pldev=-1  //fallback

if(!gen.quiet)

   if (clerr==-1)
     string errmsg="OpenCL error (CL_DEVICE_NOT_FOUND), using Igor code to generate trial vectors"
    else
     errmsg="OpenCL error (code "+num2str(clerr)+"), using Igor code to generate trial vectors"
   endif

 print errmsg
endif
 
endif



end

//******


static function executeCL_maketrials(gen, chi2idx, popmatrix, popmatrix_trial)
	Struct GEN_optimisation &gen

wave  chi2idx, popmatrix, popmatrix_trial

variable veclen=dimsize(popmatrix_trial,0)
variable nvec=dimsize(popmatrix_trial,1)
variable nhsize=numpnts(gen.nhidxwave)

//"kernel void make_trials(global float *params, global float *limits, global float *chi2idx, global float *popmatrix,\n"
//"                                 global float *popmatrix_trial, global int *nhidx, global int *seeds, global int *sizes)\n"
//params[0] k_m

//params[0] k_m
//params[1] recomb
//v2: params[2]  bfrac
//sizes[0]  veclen
//sizes[1]  nvec
//sizes[2]  nseeds
//sizes[3]  nhsize

////set up data for the kernel 
make/free/N=3 params  //optimisation constants
params[0]=gen.k_m ; params[1]=gen.recombination
params[2]=gen.bfrac
make/I/free/N=4 sizes
sizes[0]=veclen
sizes[1]=nvec
sizes[3]=nhsize

variable nseeds=50 
sizes[2]=nseeds
make/free/I/N=(nseeds)  seeds
seeds=round(enoise(1e9))

//make free wave copy as float for limits in case the original argument wave was double
make/free/N=(veclen,2) limits ; limits= gen.limits

make/free/I/N=(nhsize) idxwave;  idxwave=gen.nhidxwave  //make sure is int32

Make /FREE/I /N=8 W_MemFlags
	
		W_MemFlags[0]=IgorCLReadOnly  //a bit faster 
		W_MemFlags[1]=IgorCLReadOnly
		W_MemFlags[2]=IgorCLReadOnly
		W_MemFlags[3]=IgorCLReadOnly
		W_MemFlags[4]=IgorCLWriteOnly
		W_MemFlags[5]=IgorCLReadOnly
		W_MemFlags[6]=IgorCLReadOnly
		W_MemFlags[7]=IgorCLReadOnly
		
variable dev=isbitset(gen.pldev, 0)+2*isbitset(gen.pldev, 1)
variable pltf=isbitset(gen.pldev, 2)+2*isbitset(gen.pldev, 3)

variable gsze_0  //globalsize(0) is the number of threads used


gsze_0=min(nvec, gen.maxCLthreads)  //best to have 1 thread/vector if possible, but limit threads to n° of cores for better performance   

IgorCL /PLTM=(pltf) /DEV=(dev) /SRCB=gen.GEN_CompiledBinary /GSZE={gsze_0, 1,1} /KERN="make_trials" /MFLG=W_MemFlags params, limits, chi2idx, popmatrix, popmatrix_trial, idxwave, seeds, sizes


end

//**  function defining CL code 
//converted from notebook text by  CopyCodeToIgorString in IgorCLProcedures.ipf (see IgorCL distribution)

static function/s CLcode(gen)
	Struct GEN_optimisation &gen
	
	
variable veclen=round(numpnts(gen.coefs))	

string igorCLCode = ""
igorCLCode += "#define MAX_VECLEN "+num2str(veclen)+"\n"
igorCLCode += "\n"
//MWC64X : uniform random number generator from https://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html
igorCLCode += "inline uint MWC64X(uint2 *state)\n"
igorCLCode += "{\n"
igorCLCode += "    enum { A=4294883355U};\n"
igorCLCode += "    uint x=(*state).x, c=(*state).y;  // Unpack the state\n"
igorCLCode += "    uint res=x^c;                     // Calculate the result\n"
igorCLCode += "    uint hi=mul_hi(x,A);              // Step the RNG\n"
igorCLCode += "    x=x*A+c;\n"
igorCLCode += "    c=hi+(x<c);\n"
igorCLCode += "    *state=(uint2)(x,c);               // Pack the state back up\n"
igorCLCode += "    return res;                       // Return the next result\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "//******************\n"
igorCLCode += "inline float enoise1(uint2 *pstate)  // in (0,1)\n"
igorCLCode += "{\n"
igorCLCode += "\n"
igorCLCode += "return (float) MWC64X(pstate)/(float) 0xffffffffU;\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "\n"
igorCLCode += "//**********evenly distributed integers in range [0,n-1]\n"
igorCLCode += "\n"
igorCLCode += "inline int inoisen(int range, uint2 *pstate)  // in [0,n-1]\n"
igorCLCode += "{\n"
igorCLCode += "\n"
igorCLCode += "return (int) (range*(enoise1(pstate)-0.00001f)) ;  //make sure it is less\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "//***************\n"
// generator was tested with this function:
//igorCLCode += "kernel void testrand(global float* outputMatrix, global int* nPoints)\n"
//igorCLCode += "\n"
//igorCLCode += "{\n"
//igorCLCode += "	int globalID = get_global_id(0);\n"
//igorCLCode += "\n"
//igorCLCode += "\n"
//igorCLCode += "	if(globalID==0) \n"
//igorCLCode += "	{\n"
//igorCLCode += "	\n"
//igorCLCode += "	uint2 seed;\n"
//igorCLCode += "	seed.x=51;\n"
//igorCLCode += "	seed.y=21;\n"
//igorCLCode += "	\n"
//igorCLCode += "	for(int i=0; i<nPoints[0]; i++) \n"
//igorCLCode += "	 {\n"
//igorCLCode += "	   float rnd=inoisen(5, &seed);\n"
//igorCLCode += "	   outputMatrix[i]=rnd;\n"
//igorCLCode += "	 }\n"
//igorCLCode += "	}\n"
//igorCLCode += "\n"
//igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "//***********************************\n"
igorCLCode += "inline void set_trialvector(uint2 *pstate, float recomb, float *p_vector, float *bprime_vector, float *trial_vector, global int *nhidx, int nhsize) \n"
igorCLCode += "{  //passing one seed here\n"
igorCLCode += "\n"
igorCLCode += "	//random index:\n"
igorCLCode += "	int R=inoisen(nhsize, pstate);\n"
igorCLCode += "\n"
igorCLCode += "	int ii, id; float ri; \n"
igorCLCode += "\n"
igorCLCode += "	for(int id=0;id<nhsize;id++)	\n"
igorCLCode += "	{  "
igorCLCode += "		ri = enoise1(pstate);\n"
igorCLCode += "		if(ri<=recomb || R==id)\n"
igorCLCode += "       { ii=nhidx[id]; "
igorCLCode += "			trial_vector[ii]=bprime_vector[ii];}\n"
igorCLCode += "		\n"
igorCLCode += " }\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "//**************************\n"
igorCLCode += "void ensureconstraints(uint2 *pstate, float *trial_vector, global float *limits, int veclen, global int *nhidx, int nhsize)  //add global to limits\n"
igorCLCode += "{\n"
igorCLCode += "\n"
igorCLCode += "	for(int id=0;id<nhsize;id++)\n"
igorCLCode += "	{  int ii=nhidx[id];"
igorCLCode += "		float lowerbound=limits[ii];\n"
igorCLCode += "		float upperbound=limits[ii+veclen];  //2nd column\n"
igorCLCode += "		if(trial_vector[ii]<lowerbound || trial_vector[ii]>upperbound)	//are we in the limits?\n"
igorCLCode += "			trial_vector[ii]=lowerbound+enoise1(pstate)*(upperbound-lowerbound);		//this should ensure that the parameter is in limits!!!\n"
igorCLCode += "		\n"
igorCLCode += "		\n"
igorCLCode += "	}\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += "\n"
igorCLCode += "//******************************************\n"
igorCLCode += "kernel void make_trials(global float *params, global float *limits, global float *chi2idx, global float *popmatrix,\n"
igorCLCode += "                                 global float *popmatrix_trial, global int *nhidx, global int *seeds, global int *sizes)\n"
//params[0] k_m
//params[1] recomb
//params[2]  bfrac
//sizes[0]  veclen
//sizes[1]  nvec
//sizes[2]  nseeds
//sizes[3]  nhsize

igorCLCode += "\n"
igorCLCode += "{\n"
igorCLCode += "\n"
igorCLCode += "if(sizes[0]>MAX_VECLEN)  //signal problem putting nan, don't continue to avoid crash (shouldn't happen if define put in software)\n"
igorCLCode += " { \n"
igorCLCode += "  popmatrix_trial[0]=NAN;\n"
igorCLCode += "  return;\n"
igorCLCode += "}\n"
igorCLCode += "\n"
igorCLCode += " int globalID = get_global_id(0);        \n"
igorCLCode += " int globalSize = get_global_size(0);\n"
igorCLCode += "\n"
igorCLCode += " int veclen=sizes[0];\n"
igorCLCode += " int nvec=sizes[1];\n"
igorCLCode += " int nseeds=sizes[2];\n"
igorCLCode += " int nhsize=sizes[3];\n"
igorCLCode += "\n"
igorCLCode += " float k_m=params[0];\n"
igorCLCode += " float recomb = params[1];\n"
igorCLCode += " float bfrac =  params[2];\n"
igorCLCode += "\n"
igorCLCode += " //set up a seed for this thread\n"
igorCLCode += "  uint2 state; \n"
igorCLCode += "  state.x=seeds[globalID % nseeds];\n"
igorCLCode += "  state.y=seeds[(globalID+1) % nseeds];  //take next seed\n"
igorCLCode += "\n"
igorCLCode += "  //local arrays\n"
igorCLCode += "  float p_vector[MAX_VECLEN];\n"
igorCLCode += "  float br_vector[MAX_VECLEN];\n"       //random "br" vector among best vectors
igorCLCode += "  float bprime_vector[MAX_VECLEN];\n"
igorCLCode += "  float trial_vector[MAX_VECLEN];\n"
igorCLCode += "  \n"
igorCLCode += "\n"
igorCLCode += " for(int vii=globalID; vii<nvec; vii+=globalSize)  //vii is vector number, loop if nvec>globalsize \n"
igorCLCode += "  {\n"
igorCLCode += "	\n"
igorCLCode += "	  state.x += 1; //to have different values in loop\n"
igorCLCode += "	  state.y += 1;\n"
igorCLCode += "	  \n"
igorCLCode += "	  int rnd_a=inoisen(nvec, &state);\n"
igorCLCode += "	  int rnd_b=inoisen(nvec, &state);\n"
igorCLCode += "	  int rnd_bfrac=min(inoisen((int)(nvec*bfrac), &state), nvec-1);\n"  //random index for chi2idx
igorCLCode += "	  int rnd_best=chi2idx[rnd_bfrac]  ;\n"  //index of random bvector among best
igorCLCode += "	  \n"
igorCLCode += "	  for (int iv=0; iv<veclen; iv++)\n"
igorCLCode += "	               {br_vector[iv]=popmatrix[rnd_best*veclen+iv];\n"
igorCLCode += "	               bprime_vector[iv]=br_vector[iv]+k_m*(popmatrix[rnd_a*veclen+iv]-popmatrix[rnd_b*veclen+iv]); }\n"
igorCLCode += "	               \n"
igorCLCode += "	  for (int iv=0; iv<veclen; iv+=1)\n"
igorCLCode += "	     {trial_vector[iv]=p_vector[iv] =  popmatrix[vii*veclen + iv]; }\n"  //copy to trial too for held pos
igorCLCode += "	     \n"
igorCLCode += "//only non hold pos set: inline void set_trialvector(uint2 *pstate, float recomb, float *p_vector, float *bprime_vector, float *trial_vector, global int *nhidx, int nhsize) \n"
igorCLCode += "     set_trialvector(&state, recomb, p_vector, bprime_vector, trial_vector, nhidx, nhsize); \n"
igorCLCode += "     \n"
igorCLCode += "//void ensureconstraints(uint2 *pstate, float *trial_vector, global float *limits, int veclen, global int *nhidx, int nhsize)"
igorCLCode += "\n"
igorCLCode += "    ensureconstraints(&state, trial_vector, limits, veclen, nhidx, nhsize);\n"
igorCLCode += "\n"
igorCLCode += "		  for (int iv=0; iv<veclen; iv+=1)\n"
igorCLCode += "		  {\n"
igorCLCode += "			popmatrix_trial[vii*veclen + iv]=trial_vector[iv];\n"
igorCLCode += "		  }\n"
igorCLCode += "     \n"
igorCLCode += "	\n"
igorCLCode += "  }\n"
igorCLCode += "}"


return igorCLCode


end

#endif   //CL functions




