ROOT  6.06/09
Reference Guide
GeneticAlgorithm.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticAlgorithm *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 //_______________________________________________________________________
26 //
27 // Base definition for genetic algorithm
28 //_______________________________________________________________________
29 
30 #include <iostream>
31 #include <algorithm>
32 #include <float.h>
33 
34 #ifdef _GLIBCXX_PARALLEL
35 #include <omp.h>
36 #endif
37 
38 #include "TMVA/GeneticAlgorithm.h"
39 #include "TMVA/Interval.h"
40 #include "TMVA/IFitterTarget.h"
41 
42 #include "TMVA/MsgLogger.h"
43 
44 namespace TMVA {
46 }
47 
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// Constructor
52 /// Parameters:
53 /// int populationSize : defines the number of "Individuals" which are created and tested
54 /// within one Generation (Iteration of the Evolution)
55 /// std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
56 /// gets the low and GetMax gets the high constraint of the variable
57 /// the size of "ranges" is the number of coefficients which are optimised
58 /// Purpose:
59 /// Creates a random population with individuals of the size ranges.size()
60 
61 TMVA::GeneticAlgorithm::GeneticAlgorithm( IFitterTarget& target, Int_t populationSize,
62  const std::vector<Interval*>& ranges, UInt_t seed )
63  : fConvCounter(-1),
64  fFitterTarget( target ),
65  fConvValue(0.),
66  fLastResult(DBL_MAX),
67  fSpread(0.1),
68  fMirror(kTRUE),
69  fFirstTime(kTRUE),
70  fMakeCopies(kFALSE),
71  fPopulationSize(populationSize),
72  fRanges( ranges ),
73  fPopulation(ranges, populationSize, seed),
74  fBestFitness(DBL_MAX),
75  fLogger( new MsgLogger("GeneticAlgorithm") )
76 {
77  fPopulation.SetRandomSeed( seed );
78 }
79 
81 {
82  // destructor; deletes fLogger
83  delete fLogger;
84 }
85 
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// calls evolution, but if it is not the first time.
89 /// If it's the first time, the random population created by the
90 /// constructor is still not evaluated, .. therefore we wait for the
91 /// second time init is called.
92 
94 {
95  if ( fFirstTime ) fFirstTime = kFALSE;
96  else {
97  Evolution();
98  }
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// if the "fitnessFunction" is called multiple times for one set of
103 /// factors (because i.e. each event of a TTree has to be assessed with
104 /// each set of Factors proposed by the Genetic Algorithm) the value
105 /// of the current calculation has to be added(? or else) to the value
106 /// obtained up to now.
107 /// example: some chi-square is calculated for every event,
108 /// after every event the new chi-square (newValue) has to be simply
109 /// added to the oldValue.
110 ///
111 /// this function has to be overridden eventually
112 /// it might contain only the following return statement.
113 /// return oldValue + newValue;
114 
116 {
117  return newValue;
118 }
119 
120 ////////////////////////////////////////////////////////////////////////////////
121 /// starts the evaluation of the fitness of all different individuals of
122 /// the population.
123 ///
124 /// this function calls implicitly (many times) the "fitnessFunction" which
125 /// has been overridden by the user.
126 
128 {
129  fBestFitness = DBL_MAX;
130 #ifdef _GLIBCXX_PARALLEL
131 
132  const int nt = omp_get_num_threads();
133  Double_t bests[nt];
134  for ( int i =0; i < nt; ++i )
135  bests[i] = fBestFitness;
136 
137 #pragma omp parallel
138  {
139  int thread_number = omp_get_thread_num();
140 #pragma omp for
141  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
142  {
143  GeneticGenes* genes = fPopulation.GetGenes(index);
144  Double_t fitness = NewFitness( genes->GetFitness(),
145  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
146  genes->SetFitness( fitness );
147 
148  if ( bests[thread_number] > fitness )
149  bests[thread_number] = fitness;
150  }
151  }
152 
153  fBestFitness = *std::min_element(bests, bests+nt);
154 
155 #else
156 
157  for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
158  GeneticGenes* genes = fPopulation.GetGenes(index);
159  Double_t fitness = NewFitness( genes->GetFitness(),
160  fFitterTarget.EstimatorFunction(genes->GetFactors()) );
161  genes->SetFitness( fitness );
162 
163  if ( fBestFitness > fitness )
164  fBestFitness = fitness;
165 
166  }
167 
168 #endif
169 
170  fPopulation.Sort();
171 
172  return fBestFitness;
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// this function is called from "init" and controls the evolution of the
177 /// individuals.
178 /// the function can be overridden to change the parameters for mutation rate
179 /// sexual reproduction and so on.
180 
182 {
183  if ( fMakeCopies )
184  fPopulation.MakeCopies( 5 );
185  fPopulation.MakeChildren();
186 
187  fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
188  fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
189 }
190 
191 ////////////////////////////////////////////////////////////////////////////////
192 /// this function provides the ability to change the stepSize of a mutation according to
193 /// the success of the last generations.
194 ///
195 /// Parameters:
196 /// int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
197 /// int successSteps : >sucessSteps Generations could improve the result
198 /// double factor : than multiply the stepSize ( spread ) by this factor
199 /// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
200 /// is divided by the factor)
201 ///
202 /// using this function one can increase the stepSize of the mutation when we have
203 /// good success (to pass fast through the easy phase-space) and reduce the stepSize
204 /// if we are in a difficult "territory" of the phase-space.
205 ///
206 
208 {
209  // < is valid for "less" comparison
210  if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
211  fLastResult = fBestFitness;
212  fSuccessList.push_front( 1 ); // it got better
213  }
214  else {
215  fSuccessList.push_front( 0 ); // it stayed the same
216  }
217  Int_t n = 0;
218  Int_t sum = 0;
219  std::deque<Int_t>::iterator vec = fSuccessList.begin();
220  for (; vec != fSuccessList.end() ; vec++) {
221  sum += *vec;
222  n++;
223  }
224 
225  if ( n >= ofSteps ) {
226  fSuccessList.pop_back();
227  if ( sum > successSteps ) { // too much success
228  fSpread /= factor;
229  if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
230  }
231  else if ( sum == successSteps ) { // on the optimal path
232  if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
233  }
234  else { // not very successful
235  fSpread *= factor;
236  if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
237  }
238  }
239 
240  return fSpread;
241 }
242 
243 ////////////////////////////////////////////////////////////////////////////////
244 /// gives back true if the last "steps" steps have lead to an improvement of the
245 /// "fitness" of the "individuals" of at least "improvement"
246 ///
247 /// this gives a simple measure of if the fitness of the individuals is
248 /// converging and no major improvement is to be expected soon.
249 ///
250 
252 {
253  if (fConvCounter < 0) {
254  fConvValue = fBestFitness;
255  }
256  if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
257  fConvCounter ++;
258  }
259  else {
260  fConvCounter = 0;
261  fConvValue = fBestFitness;
262  }
263  if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
264  if (fConvCounter < steps) return kFALSE;
265  return kTRUE;
266 }
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
STL namespace.
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
ClassImp(TMVA::GeneticAlgorithm) TMVA
Constructor Parameters: int populationSize : defines the number of "Individuals" which are created an...
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:53
unsigned int UInt_t
Definition: RtypesCore.h:42
double Double_t
Definition: RtypesCore.h:55
Double_t GetFitness() const
Definition: GeneticGenes.h:54
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:51
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
Abstract ClassifierFactory template that handles arbitrary types.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i...
const Bool_t kTRUE
Definition: Rtypes.h:91
void Init()
calls evolution, but if it is not the first time.
const Int_t n
Definition: legend1.C:16
Definition: math.cpp:60
const Bool_t GeneticAlgorithm__DEBUG__