Logo ROOT   6.12/07
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticAlgorithm *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 /*! \class TMVA::GeneticAlgorithm
26 \ingroup TMVA
27 
28 Base definition for genetic algorithm
29 
30 */
31 
32 #include <iostream>
33 #include <algorithm>
34 #include <float.h>
35 
36 #ifdef _GLIBCXX_PARALLEL
37 #include <omp.h>
38 #endif
39 
40 #include "TMVA/GeneticAlgorithm.h"
41 #include "TMVA/Interval.h"
42 #include "TMVA/IFitterTarget.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/Types.h"
45 
46 #include "RtypesCore.h"
47 #include "Rtypes.h"
48 #include "TMath.h"
49 
50 namespace TMVA {
51  const Bool_t GeneticAlgorithm__DEBUG__ = kFALSE;
52 }
53 
55 
56 ////////////////////////////////////////////////////////////////////////////////
57 /// Constructor
58 ///
59 /// Parameters:
60 ///
61 /// - int populationSize : defines the number of "Individuals" which are created and tested
62 /// within one Generation (Iteration of the Evolution)
63 /// - std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
64 /// gets the low and GetMax gets the high constraint of the variable
65 /// the size of "ranges" is the number of coefficients which are optimised
66 /// Purpose:
67 ///
68 /// Creates a random population with individuals of the size ranges.size()
69 
71  const std::vector<Interval*>& ranges, UInt_t seed )
72 : fConvCounter(-1),
73  fFitterTarget( target ),
74  fConvValue(0.),
75  fLastResult(DBL_MAX),
76  fSpread(0.1),
77  fMirror(kTRUE),
78  fFirstTime(kTRUE),
79  fMakeCopies(kFALSE),
80  fPopulationSize(populationSize),
81  fRanges( ranges ),
82  fPopulation(ranges, populationSize, seed),
83  fBestFitness(DBL_MAX),
84  fLogger( new MsgLogger("GeneticAlgorithm") )
85 {
86  fPopulation.SetRandomSeed( seed );
87 }
88 
90 {
91  // destructor; deletes fLogger
92  delete fLogger;
93 }
94 
95 
96 ////////////////////////////////////////////////////////////////////////////////
97 /// calls evolution, but if it is not the first time.
98 /// If it's the first time, the random population created by the
99 /// constructor is still not evaluated, .. therefore we wait for the
100 /// second time init is called.
101 
103 {
104  if ( fFirstTime ) fFirstTime = kFALSE;
105  else {
106  Evolution();
107  }
108 }
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// if the "fitnessFunction" is called multiple times for one set of
112 /// factors (because i.e. each event of a TTree has to be assessed with
113 /// each set of Factors proposed by the Genetic Algorithm) the value
114 /// of the current calculation has to be added(? or else) to the value
115 /// obtained up to now.
116 /// example: some chi-square is calculated for every event,
117 /// after every event the new chi-square (newValue) has to be simply
118 /// added to the oldValue.
119 ///
120 /// this function has to be overridden eventually
121 /// it might contain only the following return statement.
122 /// return oldValue + newValue;
123 
125 {
126  return newValue;
127 }
128 
129 ////////////////////////////////////////////////////////////////////////////////
130 /// starts the evaluation of the fitness of all different individuals of
131 /// the population.
132 ///
133 /// this function calls implicitly (many times) the "fitnessFunction" which
134 /// has been overridden by the user.
135 
137 {
138  fBestFitness = DBL_MAX;
139 #ifdef _GLIBCXX_PARALLEL
140 
141  const int nt = omp_get_num_threads();
142  Double_t bests[nt];
143  for ( int i =0; i < nt; ++i )
144  bests[i] = fBestFitness;
145 
146 #pragma omp parallel
147  {
148  int thread_number = omp_get_thread_num();
149 #pragma omp for
150  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
151  {
152  GeneticGenes* genes = fPopulation.GetGenes(index);
153  Double_t fitness = NewFitness( genes->GetFitness(),
155  genes->SetFitness( fitness );
156 
157  if ( bests[thread_number] > fitness )
158  bests[thread_number] = fitness;
159  }
160  }
161 
162  fBestFitness = *std::min_element(bests, bests+nt);
163 
164 #else
165 
166  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
167  GeneticGenes* genes = fPopulation.GetGenes(index);
168  Double_t fitness = NewFitness( genes->GetFitness(),
170  genes->SetFitness( fitness );
171 
172  if ( fBestFitness > fitness )
173  fBestFitness = fitness;
174 
175  }
176 
177 #endif
178 
179  fPopulation.Sort();
180 
181  return fBestFitness;
182 }
183 
184 ////////////////////////////////////////////////////////////////////////////////
185 /// this function is called from "init" and controls the evolution of the
186 /// individuals.
187 ///
188 /// The function can be overridden to change the parameters for mutation rate
189 /// sexual reproduction and so on.
190 
192 {
193  if ( fMakeCopies )
194  fPopulation.MakeCopies( 5 );
196 
197  fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// this function provides the ability to change the stepSize of a mutation according to
203 /// the success of the last generations.
204 ///
205 /// Parameters:
206 ///
207 /// - int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
208 /// - int successSteps : >sucessSteps Generations could improve the result
209 /// - double factor : than multiply the stepSize ( spread ) by this factor
210 ///
211 /// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
212 /// is divided by the factor)
213 ///
214 /// using this function one can increase the stepSize of the mutation when we have
215 /// good success (to pass fast through the easy phase-space) and reduce the stepSize
216 /// if we are in a difficult "territory" of the phase-space.
217 
219 {
220  // < is valid for "less" comparison
221  if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
223  fSuccessList.push_front( 1 ); // it got better
224  }
225  else {
226  fSuccessList.push_front( 0 ); // it stayed the same
227  }
228  Int_t n = 0;
229  Int_t sum = 0;
230  std::deque<Int_t>::iterator vec = fSuccessList.begin();
231  for (; vec != fSuccessList.end() ; vec++) {
232  sum += *vec;
233  n++;
234  }
235 
236  if ( n >= ofSteps ) {
237  fSuccessList.pop_back();
238  if ( sum > successSteps ) { // too much success
239  fSpread /= factor;
240  if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
241  }
242  else if ( sum == successSteps ) { // on the optimal path
243  if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
244  }
245  else { // not very successful
246  fSpread *= factor;
247  if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
248  }
249  }
250 
251  return fSpread;
252 }
253 
254 ////////////////////////////////////////////////////////////////////////////////
255 /// gives back true if the last "steps" steps have lead to an improvement of the
256 /// "fitness" of the "individuals" of at least "improvement"
257 ///
258 /// this gives a simple measure of if the fitness of the individuals is
259 /// converging and no major improvement is to be expected soon.
260 
262 {
263  if (fConvCounter < 0) {
265  }
266  if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
267  fConvCounter ++;
268  }
269  else {
270  fConvCounter = 0;
272  }
273  if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
274  if (fConvCounter < steps) return kFALSE;
275  return kTRUE;
276 }
static long int sum(long int i)
Definition: Factory.cxx:2173
virtual Double_t EstimatorFunction(std::vector< Double_t > &parameters)=0
Int_t GetPopulationSize() const
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
GeneticAlgorithm(IFitterTarget &target, Int_t populationSize, const std::vector< TMVA::Interval *> &ranges, UInt_t seed=0)
Constructor.
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
void MakeChildren()
Creates children out of members of the current generation.
void MakeCopies(int number)
Produces offspring which is are copies of their parents.
GeneticPopulation fPopulation
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
void Mutate(Double_t probability=20, Int_t startIndex=0, Bool_t near=kFALSE, Double_t spread=0.1, Bool_t mirror=kFALSE)
Mutates the individuals in the genePool.
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:51
IFitterTarget & fFitterTarget
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
unsigned int UInt_t
Definition: RtypesCore.h:42
Base definition for genetic algorithm.
GeneticGenes * GetGenes(Int_t index)
gives back the "Genes" of the population with the given index.
Double_t GetFitness() const
Definition: GeneticGenes.h:52
const Bool_t kFALSE
Definition: RtypesCore.h:88
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
void Sort()
sort the genepool according to the fitness of the individuals
MsgLogger & Log() const
std::deque< Int_t > fSuccessList
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
Abstract ClassifierFactory template that handles arbitrary types.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i...
Interface for a fitter &#39;target&#39;.
Definition: IFitterTarget.h:44
const Bool_t kTRUE
Definition: RtypesCore.h:87
void Init()
calls evolution, but if it is not the first time.
const Int_t n
Definition: legend1.C:16