Logo ROOT   6.07/09
Reference Guide
GeneticPopulation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticPopulation *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 #include <iostream>
26 #include <iomanip>
27 
28 #include "Rstrstream.h"
29 #include "TSystem.h"
30 #include "TRandom3.h"
31 #include "TH1.h"
32 #include <algorithm>
33 
34 #include "TMVA/GeneticPopulation.h"
35 #include "TMVA/GeneticGenes.h"
36 #include "TMVA/MsgLogger.h"
37 
39 
40 using namespace std;
41 
42 //_______________________________________________________________________
43 //
44 // Population definition for genetic algorithm
45 //_______________________________________________________________________
46 
47 ////////////////////////////////////////////////////////////////////////////////
48 /// Constructor
49 
50 TMVA::GeneticPopulation::GeneticPopulation(const std::vector<Interval*>& ranges, Int_t size, UInt_t seed)
51  : fGenePool(size),
52  fRanges(ranges.size()),
53  fLogger( new MsgLogger("GeneticPopulation") )
54 {
55  // create a randomGenerator for this population and set a seed
56  // create the genePools
57  //
58  fRandomGenerator = new TRandom3( 100 ); //please check
59  fRandomGenerator->Uniform(0.,1.);
60  fRandomGenerator->SetSeed( seed );
61 
62  for ( unsigned int i = 0; i < ranges.size(); ++i )
63  fRanges[i] = new TMVA::GeneticRange( fRandomGenerator, ranges[i] );
64 
65  vector<Double_t> newEntry( fRanges.size() );
66  for ( int i = 0; i < size; ++i )
67  {
68  for ( unsigned int rIt = 0; rIt < fRanges.size(); ++rIt )
69  newEntry[rIt] = fRanges[rIt]->Random();
70  fGenePool[i] = TMVA::GeneticGenes( newEntry);
71  }
72 
73  fPopulationSizeLimit = size;
74 }
75 
76 ////////////////////////////////////////////////////////////////////////////////
77 /// destructor
78 
80 {
81  if (fRandomGenerator != NULL) delete fRandomGenerator;
82 
83  std::vector<GeneticRange*>::iterator it = fRanges.begin();
84  for (;it!=fRanges.end(); it++) delete *it;
85 
86  delete fLogger;
87 }
88 
89 
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// the random seed of the random generator
93 
95 {
96  fRandomGenerator->SetSeed( seed );
97 }
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// produces offspring which is are copies of their parents
101 /// Parameters:
102 /// int number : the number of the last individual to be copied
103 ///
104 
106 {
107  int i=0;
108  for (std::vector<TMVA::GeneticGenes>::iterator it = fGenePool.begin();
109  it != fGenePool.end() && i < number;
110  ++it, ++i ) {
111  GiveHint( it->GetFactors(), it->GetFitness() );
112  }
113 }
114 
115 ////////////////////////////////////////////////////////////////////////////////
116 /// does what the name says,... it creates children out of members of the
117 /// current generation
118 /// children have a combination of the coefficients of their parents
119 ///
120 
122 {
123 #ifdef _GLIBCXX_PARALLEL
124 #pragma omp parallel
125 #pragma omp for
126 #endif
127  for ( int it = 0; it < (int) (fGenePool.size() / 2); ++it )
128  {
129  Int_t pos = (Int_t)fRandomGenerator->Integer( fGenePool.size()/2 );
130  fGenePool[(fGenePool.size() / 2) + it] = MakeSex( fGenePool[it], fGenePool[pos] );
131  }
132 }
133 
134 ////////////////////////////////////////////////////////////////////////////////
135 /// this function takes two individuals and produces offspring by mixing (recombining) their
136 /// coefficients
137 ///
138 
140  TMVA::GeneticGenes female )
141 {
142  vector< Double_t > child(fRanges.size());
143  for (unsigned int i = 0; i < fRanges.size(); ++i) {
144  if (fRandomGenerator->Integer( 2 ) == 0) {
145  child[i] = male.GetFactors()[i];
146  }else{
147  child[i] = female.GetFactors()[i];
148  }
149  }
150  return TMVA::GeneticGenes( child );
151 }
152 
153 ////////////////////////////////////////////////////////////////////////////////
154 /// mutates the individuals in the genePool
155 /// Parameters:
156 /// double probability : gives the probability (in percent) of a mutation of a coefficient
157 /// int startIndex : leaves unchanged (without mutation) the individuals which are better ranked
158 /// than indicated by "startIndex". This means: if "startIndex==3", the first (and best)
159 /// three individuals are not mutaded. This allows to preserve the best result of the
160 /// current Generation for the next generation.
161 /// Bool_t near : if true, the mutation will produce a new coefficient which is "near" the old one
162 /// (gaussian around the current value)
163 /// double spread : if near==true, spread gives the sigma of the gaussian
164 /// Bool_t mirror : if the new value obtained would be outside of the given constraints
165 /// the value is mapped between the constraints again. This can be done either
166 /// by a kind of periodic boundary conditions or mirrored at the boundary.
167 /// (mirror = true seems more "natural")
168 ///
169 
170 void TMVA::GeneticPopulation::Mutate( Double_t probability , Int_t startIndex,
171  Bool_t near, Double_t spread, Bool_t mirror )
172 {
173  vector< Double_t>::iterator vec;
174  vector< TMVA::GeneticRange* >::iterator vecRange;
175 
176  //#ifdef _GLIBCXX_PARALLEL
177  // #pragma omp parallel
178  // #pragma omp for
179  //#endif
180  // The range methods are not thread safe!
181  for (int it = startIndex; it < (int) fGenePool.size(); ++it) {
182  vecRange = fRanges.begin();
183  for (vec = (fGenePool[it].GetFactors()).begin(); vec < (fGenePool[it].GetFactors()).end(); ++vec) {
184  if (fRandomGenerator->Uniform( 100 ) <= probability) {
185  (*vec) = (*vecRange)->Random( near, (*vec), spread, mirror );
186  }
187  ++vecRange;
188  }
189  }
190 }
191 
192 
193 ////////////////////////////////////////////////////////////////////////////////
194 /// gives back the "Genes" of the population with the given index.
195 ///
196 
198 {
199  return &(fGenePool[index]);
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// make a little printout of the individuals up to index "untilIndex"
204 /// this means, .. write out the best "untilIndex" individuals.
205 ///
206 
208 {
209  for ( unsigned int it = 0; it < fGenePool.size(); ++it )
210  {
211  Int_t n=0;
212  if (untilIndex >= -1 ) {
213  if (untilIndex == -1 ) return;
214  untilIndex--;
215  }
216  Log() << "fitness: " << fGenePool[it].GetFitness() << " ";
217  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
218  vec < fGenePool[it].GetFactors().end(); vec++ ) {
219  Log() << "f_" << n++ << ": " << (*vec) << " ";
220  }
221  Log() << Endl;
222  }
223 }
224 
225 ////////////////////////////////////////////////////////////////////////////////
226 /// make a little printout to the stream "out" of the individuals up to index "untilIndex"
227 /// this means, .. write out the best "untilIndex" individuals.
228 ///
229 
230 void TMVA::GeneticPopulation::Print( ostream & out, Int_t untilIndex )
231 {
232  for ( unsigned int it = 0; it < fGenePool.size(); ++it ) {
233  Int_t n=0;
234  if (untilIndex >= -1 ) {
235  if (untilIndex == -1 ) return;
236  untilIndex--;
237  }
238  out << "fitness: " << fGenePool[it].GetFitness() << " ";
239  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
240  vec < fGenePool[it].GetFactors().end(); vec++ ) {
241  out << "f_" << n++ << ": " << (*vec) << " ";
242  }
243  out << std::endl;
244  }
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// give back a histogram with the distribution of the coefficients
249 /// parameters:
250 /// int bins : number of bins of the histogram
251 /// int min : histogram minimum
252 /// int max : maximum value of the histogram
253 ///
254 
256  Int_t min, Int_t max )
257 {
258  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
259 
260  std::stringstream histName;
261  histName.clear();
262  histName.str("v");
263  histName << varNumber;
264  TH1F *hist = new TH1F( histName.str().c_str(),histName.str().c_str(), bins,min,max );
265 
266  return hist;
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////////
270 /// gives back all the values of coefficient "varNumber" of the current generation
271 ///
272 
273 vector<Double_t> TMVA::GeneticPopulation::VariableDistribution( Int_t /*varNumber*/ )
274 {
275  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
276 
277  vector< Double_t > varDist;
278 
279  return varDist;
280 }
281 
282 ////////////////////////////////////////////////////////////////////////////////
283 /// add another population (strangers) to the one of this GeneticPopulation
284 
286 {
287  for (std::vector<TMVA::GeneticGenes>::iterator it = strangers->fGenePool.begin();
288  it != strangers->fGenePool.end(); it++ ) {
289  GiveHint( it->GetFactors(), it->GetFitness() );
290  }
291 }
292 
293 ////////////////////////////////////////////////////////////////////////////////
294 /// add another population (strangers) to the one of this GeneticPopulation
295 
297 {
298  AddPopulation(&strangers);
299 }
300 
301 ////////////////////////////////////////////////////////////////////////////////
302 /// trim the population to the predefined size
303 
305 {
306  std::sort(fGenePool.begin(), fGenePool.end());
307  while ( fGenePool.size() > (unsigned int) fPopulationSizeLimit )
308  fGenePool.pop_back();
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 /// add an individual (a set of variables) to the population
313 /// if there is a set of variables which is known to perform good, they can be given as a hint to the population
314 
315 void TMVA::GeneticPopulation::GiveHint( std::vector< Double_t >& hint, Double_t fitness )
316 {
317  TMVA::GeneticGenes g(hint);
318  g.SetFitness(fitness);
319 
320  fGenePool.push_back( g );
321 }
322 
323 ////////////////////////////////////////////////////////////////////////////////
324 /// sort the genepool according to the fitness of the individuals
325 
327 {
328  std::sort(fGenePool.begin(), fGenePool.end());
329 }
330 
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void GiveHint(std::vector< Double_t > &hint, Double_t fitness=0)
add an individual (a set of variables) to the population if there is a set of variables which is know...
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:201
void MakeChildren()
does what the name says,...
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void MakeCopies(int number)
produces offspring which is are copies of their parents Parameters: int number : the number of the la...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
void AddPopulation(GeneticPopulation *strangers)
add another population (strangers) to the one of this GeneticPopulation
MsgLogger & Log() const
void Mutate(Double_t probability=20, Int_t startIndex=0, Bool_t near=kFALSE, Double_t spread=0.1, Bool_t mirror=kFALSE)
mutates the individuals in the genePool Parameters: double probability : gives the probability (in pe...
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
Definition: TRandom.cxx:320
std::vector< TMVA::GeneticRange * > fRanges
virtual ~GeneticPopulation()
destructor
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:53
void TrimPopulation()
trim the population to the predefined size
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
unsigned int UInt_t
Definition: RtypesCore.h:42
GeneticGenes * GetGenes(Int_t index)
gives back the "Genes" of the population with the given index.
void Print(Int_t untilIndex=-1)
make a little printout of the individuals up to index "untilIndex" this means, .
#define ClassImp(name)
Definition: Rtypes.h:279
std::vector< TMVA::GeneticGenes > fGenePool
double Double_t
Definition: RtypesCore.h:55
void Sort()
sort the genepool according to the fitness of the individuals
GeneticGenes MakeSex(GeneticGenes male, GeneticGenes female)
this function takes two individuals and produces offspring by mixing (recombining) their coefficients...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:51
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Definition: TRandom.cxx:606
Abstract ClassifierFactory template that handles arbitrary types.
#define NULL
Definition: Rtypes.h:82
TH1F * VariableDistribution(Int_t varNumber, Int_t bins, Int_t min, Int_t max)
give back a histogram with the distribution of the coefficients parameters: int bins : number of bins...
const Int_t n
Definition: legend1.C:16