Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
GeneticAlgorithm.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TMVA::GeneticAlgorithm *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * *
20 * Redistribution and use in source and binary forms, with or without *
21 * modification, are permitted according to the terms listed in LICENSE *
22 * (see tmva/doc/LICENSE) *
23 **********************************************************************************/
24
25/*! \class TMVA::GeneticAlgorithm
26\ingroup TMVA
27
28Base definition for genetic algorithm
29
30*/
31
32#include <algorithm>
33#include <cfloat>
34
35#ifdef _GLIBCXX_PARALLEL
36#include <omp.h>
37#endif
38
40#include "TMVA/Interval.h"
41#include "TMVA/IFitterTarget.h"
42#include "TMVA/MsgLogger.h"
43#include "TMVA/Types.h"
44
45#include "RtypesCore.h"
46#include "Rtypes.h"
47#include "TMath.h"
48
49namespace TMVA {
51}
52
53
54////////////////////////////////////////////////////////////////////////////////
55/// Constructor
56///
57/// Parameters:
58///
59/// - int populationSize : defines the number of "Individuals" which are created and tested
60/// within one Generation (Iteration of the Evolution)
61/// - std::vector<TMVA::Interval*> ranges : Interval holds the information of an interval, where the GetMin
62/// gets the low and GetMax gets the high constraint of the variable
63/// the size of "ranges" is the number of coefficients which are optimised
64/// Purpose:
65///
66/// Creates a random population with individuals of the size ranges.size()
67
69 const std::vector<Interval*>& ranges, UInt_t seed )
70: fConvCounter(-1),
71 fFitterTarget( target ),
72 fConvValue(0.),
73 fLastResult(DBL_MAX),
74 fSpread(0.1),
78 fPopulationSize(populationSize),
79 fRanges( ranges ),
80 fPopulation(ranges, populationSize, seed),
81 fBestFitness(DBL_MAX),
82 fLogger( new MsgLogger("GeneticAlgorithm") )
83{
84 fPopulation.SetRandomSeed( seed );
85}
86
88{
89 // destructor; deletes fLogger
90 delete fLogger;
91}
92
93
94////////////////////////////////////////////////////////////////////////////////
95/// calls evolution, but if it is not the first time.
96/// If it's the first time, the random population created by the
97/// constructor is still not evaluated, .. therefore we wait for the
98/// second time init is called.
99
101{
103 else {
104 Evolution();
105 }
106}
107
108////////////////////////////////////////////////////////////////////////////////
109/// if the "fitnessFunction" is called multiple times for one set of
110/// factors (because i.e. each event of a TTree has to be assessed with
111/// each set of Factors proposed by the Genetic Algorithm) the value
112/// of the current calculation has to be added(? or else) to the value
113/// obtained up to now.
114/// example: some chi-square is calculated for every event,
115/// after every event the new chi-square (newValue) has to be simply
116/// added to the oldValue.
117///
118/// this function has to be overridden eventually
119/// it might contain only the following return statement.
120/// return oldValue + newValue;
121
123{
124 return newValue;
125}
126
127////////////////////////////////////////////////////////////////////////////////
128/// starts the evaluation of the fitness of all different individuals of
129/// the population.
130///
131/// this function calls implicitly (many times) the "fitnessFunction" which
132/// has been overridden by the user.
133
135{
136 fBestFitness = DBL_MAX;
137#ifdef _GLIBCXX_PARALLEL
138
139 const int nt = omp_get_num_threads();
140 Double_t bests[nt];
141 for ( int i =0; i < nt; ++i )
142 bests[i] = fBestFitness;
143
144#pragma omp parallel
145 {
146 int thread_number = omp_get_thread_num();
147#pragma omp for
148 for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index )
149 {
150 GeneticGenes* genes = fPopulation.GetGenes(index);
151 Double_t fitness = NewFitness( genes->GetFitness(),
152 fFitterTarget.EstimatorFunction(genes->GetFactors()) );
153 genes->SetFitness( fitness );
154
155 if ( bests[thread_number] > fitness )
156 bests[thread_number] = fitness;
157 }
158 }
159
160 fBestFitness = *std::min_element(bests, bests+nt);
161
162#else
163
164 for ( int index = 0; index < fPopulation.GetPopulationSize(); ++index ) {
165 GeneticGenes* genes = fPopulation.GetGenes(index);
166 Double_t fitness = NewFitness( genes->GetFitness(),
167 fFitterTarget.EstimatorFunction(genes->GetFactors()) );
168 genes->SetFitness( fitness );
169
170 if ( fBestFitness > fitness )
171 fBestFitness = fitness;
172
173 }
174
175#endif
176
177 fPopulation.Sort();
178
179 return fBestFitness;
180}
181
182////////////////////////////////////////////////////////////////////////////////
183/// this function is called from "init" and controls the evolution of the
184/// individuals.
185///
186/// The function can be overridden to change the parameters for mutation rate
187/// sexual reproduction and so on.
188
190{
191 if ( fMakeCopies )
192 fPopulation.MakeCopies( 5 );
193 fPopulation.MakeChildren();
194
195 fPopulation.Mutate( 10, 3, kTRUE, fSpread, fMirror );
196 fPopulation.Mutate( 40, fPopulation.GetPopulationSize()*3/4 );
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// this function provides the ability to change the stepSize of a mutation according to
201/// the success of the last generations.
202///
203/// Parameters:
204///
205/// - int ofSteps : = if OF the number of STEPS given in this variable (ofSteps)
206/// - int successSteps : >sucessSteps Generations could improve the result
207/// - double factor : than multiply the stepSize ( spread ) by this factor
208///
209/// (if ofSteps == successSteps nothing is changed, if ofSteps < successSteps, the spread
210/// is divided by the factor)
211///
212/// using this function one can increase the stepSize of the mutation when we have
213/// good success (to pass fast through the easy phase-space) and reduce the stepSize
214/// if we are in a difficult "territory" of the phase-space.
215
217{
218 // < is valid for "less" comparison
219 if ( fBestFitness < fLastResult || fSuccessList.size() <=0 ) {
221 fSuccessList.push_front( 1 ); // it got better
222 }
223 else {
224 fSuccessList.push_front( 0 ); // it stayed the same
225 }
226 Int_t n = 0;
227 Int_t sum = 0;
228 std::deque<Int_t>::iterator vec = fSuccessList.begin();
229 for (; vec != fSuccessList.end() ; ++vec) {
230 sum += *vec;
231 n++;
232 }
233
234 if ( n >= ofSteps ) {
235 fSuccessList.pop_back();
236 if ( sum > successSteps ) { // too much success
237 fSpread /= factor;
238 if (GeneticAlgorithm__DEBUG__) Log() << kINFO << ">" << std::flush;
239 }
240 else if ( sum == successSteps ) { // on the optimal path
241 if (GeneticAlgorithm__DEBUG__) Log() << "=" << std::flush;
242 }
243 else { // not very successful
244 fSpread *= factor;
245 if (GeneticAlgorithm__DEBUG__) Log() << "<" << std::flush;
246 }
247 }
248
249 return fSpread;
250}
251
252////////////////////////////////////////////////////////////////////////////////
253/// gives back true if the last "steps" steps have lead to an improvement of the
254/// "fitness" of the "individuals" of at least "improvement"
255///
256/// this gives a simple measure of if the fitness of the individuals is
257/// converging and no major improvement is to be expected soon.
258
260{
261 if (fConvCounter < 0) {
263 }
264 if (TMath::Abs(fBestFitness - fConvValue) <= improvement || steps<0) {
265 fConvCounter ++;
266 }
267 else {
268 fConvCounter = 0;
270 }
271 if (GeneticAlgorithm__DEBUG__) Log() << "." << std::flush;
272 if (fConvCounter < steps) return kFALSE;
273 return kTRUE;
274}
Basic types used by ROOT and required by TInterpreter.
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
virtual Double_t SpreadControl(Int_t steps, Int_t ofSteps, Double_t factor)
this function provides the ability to change the stepSize of a mutation according to the success of t...
virtual Bool_t HasConverged(Int_t steps=10, Double_t ratio=0.1)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
const std::vector< TMVA::Interval * > & fRanges
GeneticPopulation fPopulation
void Init()
calls evolution, but if it is not the first time.
std::deque< Int_t > fSuccessList
virtual void Evolution()
this function is called from "init" and controls the evolution of the individuals.
IFitterTarget & fFitterTarget
virtual Double_t CalculateFitness()
starts the evaluation of the fitness of all different individuals of the population.
GeneticAlgorithm(IFitterTarget &target, Int_t populationSize, const std::vector< TMVA::Interval * > &ranges, UInt_t seed=0)
Constructor.
virtual Double_t NewFitness(Double_t oldValue, Double_t newValue)
if the "fitnessFunction" is called multiple times for one set of factors (because i....
MsgLogger * fLogger
! message logger
MsgLogger & Log() const
Cut optimisation interface class for genetic algorithm.
void SetFitness(Double_t fitness)
std::vector< Double_t > & GetFactors()
Double_t GetFitness() const
Interface for a fitter 'target'.
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
const Bool_t GeneticAlgorithm__DEBUG__
const Int_t n
Definition legend1.C:16
create variable transformations
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:122
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2338