Logo ROOT   6.12/07
Reference Guide
GeneticPopulation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticPopulation *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 /*! \class TMVA::GeneticPopulation
26 \ingroup TMVA
27 
28 Population definition for genetic algorithm.
29 
30 */
31 
32 #include <iostream>
33 #include <iomanip>
34 
35 #include "Rstrstream.h"
36 #include "TSystem.h"
37 #include "TRandom3.h"
38 #include "TH1.h"
39 #include <algorithm>
40 
41 #include "TMVA/GeneticPopulation.h"
42 #include "TMVA/GeneticGenes.h"
43 #include "TMVA/MsgLogger.h"
44 
46 
47 using namespace std;
48 
49 ////////////////////////////////////////////////////////////////////////////////
50 /// Constructor
51 
52 TMVA::GeneticPopulation::GeneticPopulation(const std::vector<Interval*>& ranges, Int_t size, UInt_t seed)
53  : fGenePool(size),
54  fRanges(ranges.size()),
55  fLogger( new MsgLogger("GeneticPopulation") )
56 {
57  // create a randomGenerator for this population and set a seed
58  // create the genePools
59  //
60  fRandomGenerator = new TRandom3( 100 ); //please check
61  fRandomGenerator->Uniform(0.,1.);
62  fRandomGenerator->SetSeed( seed );
63 
64  for ( unsigned int i = 0; i < ranges.size(); ++i )
65  fRanges[i] = new TMVA::GeneticRange( fRandomGenerator, ranges[i] );
66 
67  vector<Double_t> newEntry( fRanges.size() );
68  for ( int i = 0; i < size; ++i )
69  {
70  for ( unsigned int rIt = 0; rIt < fRanges.size(); ++rIt )
71  newEntry[rIt] = fRanges[rIt]->Random();
72  fGenePool[i] = TMVA::GeneticGenes( newEntry);
73  }
74 
75  fPopulationSizeLimit = size;
76 }
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// destructor
80 
82 {
83  if (fRandomGenerator != NULL) delete fRandomGenerator;
84 
85  std::vector<GeneticRange*>::iterator it = fRanges.begin();
86  for (;it!=fRanges.end(); it++) delete *it;
87 
88  delete fLogger;
89 }
90 
91 
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// the random seed of the random generator
95 
97 {
98  fRandomGenerator->SetSeed( seed );
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Produces offspring which is are copies of their parents.
103 ///
104 /// Parameters:
105 /// - int number : the number of the last individual to be copied
106 
108 {
109  int i=0;
110  for (std::vector<TMVA::GeneticGenes>::iterator it = fGenePool.begin();
111  it != fGenePool.end() && i < number;
112  ++it, ++i ) {
113  GiveHint( it->GetFactors(), it->GetFitness() );
114  }
115 }
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 /// Creates children out of members of the current generation.
119 ///
120 /// Children have a combination of the coefficients of their parents
121 
123 {
124 #ifdef _GLIBCXX_PARALLEL
125 #pragma omp parallel
126 #pragma omp for
127 #endif
128  for ( int it = 0; it < (int) (fGenePool.size() / 2); ++it )
129  {
130  Int_t pos = (Int_t)fRandomGenerator->Integer( fGenePool.size()/2 );
131  fGenePool[(fGenePool.size() / 2) + it] = MakeSex( fGenePool[it], fGenePool[pos] );
132  }
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 /// this function takes two individuals and produces offspring by mixing
137 /// (recombining) their coefficients.
138 
140  TMVA::GeneticGenes female )
141 {
142  vector< Double_t > child(fRanges.size());
143  for (unsigned int i = 0; i < fRanges.size(); ++i) {
144  if (fRandomGenerator->Integer( 2 ) == 0) {
145  child[i] = male.GetFactors()[i];
146  }else{
147  child[i] = female.GetFactors()[i];
148  }
149  }
150  return TMVA::GeneticGenes( child );
151 }
152 
153 ////////////////////////////////////////////////////////////////////////////////
154 /// Mutates the individuals in the genePool.
155 ///
156 /// Parameters:
157 ///
158 /// - double probability : gives the probability (in percent) of a mutation of a coefficient
159 /// - int startIndex : leaves unchanged (without mutation) the individuals which are better ranked
160 /// than indicated by "startIndex". This means: if "startIndex==3", the first (and best)
161 /// three individuals are not mutated. This allows to preserve the best result of the
162 /// current Generation for the next generation.
163 /// - Bool_t near : if true, the mutation will produce a new coefficient which is "near" the old one
164 /// (gaussian around the current value)
165 /// - double spread : if near==true, spread gives the sigma of the gaussian
166 /// - Bool_t mirror : if the new value obtained would be outside of the given constraints
167 /// the value is mapped between the constraints again. This can be done either
168 /// by a kind of periodic boundary conditions or mirrored at the boundary.
169 /// (mirror = true seems more "natural")
170 
171 void TMVA::GeneticPopulation::Mutate( Double_t probability , Int_t startIndex,
172  Bool_t near, Double_t spread, Bool_t mirror )
173 {
174  vector< Double_t>::iterator vec;
175  vector< TMVA::GeneticRange* >::iterator vecRange;
176 
177  //#ifdef _GLIBCXX_PARALLEL
178  // #pragma omp parallel
179  // #pragma omp for
180  //#endif
181  // The range methods are not thread safe!
182  for (int it = startIndex; it < (int) fGenePool.size(); ++it) {
183  vecRange = fRanges.begin();
184  for (vec = (fGenePool[it].GetFactors()).begin(); vec < (fGenePool[it].GetFactors()).end(); ++vec) {
185  if (fRandomGenerator->Uniform( 100 ) <= probability) {
186  (*vec) = (*vecRange)->Random( near, (*vec), spread, mirror );
187  }
188  ++vecRange;
189  }
190  }
191 }
192 
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 /// gives back the "Genes" of the population with the given index.
196 
198 {
199  return &(fGenePool[index]);
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// make a little printout of the individuals up to index "untilIndex"
204 /// this means, .. write out the best "untilIndex" individuals.
205 
207 {
208  for ( unsigned int it = 0; it < fGenePool.size(); ++it )
209  {
210  Int_t n=0;
211  if (untilIndex >= -1 ) {
212  if (untilIndex == -1 ) return;
213  untilIndex--;
214  }
215  Log() << "fitness: " << fGenePool[it].GetFitness() << " ";
216  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
217  vec < fGenePool[it].GetFactors().end(); vec++ ) {
218  Log() << "f_" << n++ << ": " << (*vec) << " ";
219  }
220  Log() << Endl;
221  }
222 }
223 
224 ////////////////////////////////////////////////////////////////////////////////
225 /// make a little printout to the stream "out" of the individuals up to index "untilIndex"
226 /// this means, .. write out the best "untilIndex" individuals.
227 
228 void TMVA::GeneticPopulation::Print( ostream & out, Int_t untilIndex )
229 {
230  for ( unsigned int it = 0; it < fGenePool.size(); ++it ) {
231  Int_t n=0;
232  if (untilIndex >= -1 ) {
233  if (untilIndex == -1 ) return;
234  untilIndex--;
235  }
236  out << "fitness: " << fGenePool[it].GetFitness() << " ";
237  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
238  vec < fGenePool[it].GetFactors().end(); vec++ ) {
239  out << "f_" << n++ << ": " << (*vec) << " ";
240  }
241  out << std::endl;
242  }
243 }
244 
245 ////////////////////////////////////////////////////////////////////////////////
246 /// give back a histogram with the distribution of the coefficients.
247 ///
248 /// Parameters:
249 ///
250 /// - int bins : number of bins of the histogram
251 /// - int min : histogram minimum
252 /// - int max : maximum value of the histogram
253 
255  Int_t min, Int_t max )
256 {
257  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
258 
259  std::stringstream histName;
260  histName.clear();
261  histName.str("v");
262  histName << varNumber;
263  TH1F *hist = new TH1F( histName.str().c_str(),histName.str().c_str(), bins,min,max );
264 
265  return hist;
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////////
269 /// gives back all the values of coefficient "varNumber" of the current generation
270 
271 vector<Double_t> TMVA::GeneticPopulation::VariableDistribution( Int_t /*varNumber*/ )
272 {
273  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
274 
275  vector< Double_t > varDist;
276 
277  return varDist;
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 /// add another population (strangers) to the one of this GeneticPopulation
282 
284 {
285  for (std::vector<TMVA::GeneticGenes>::iterator it = strangers->fGenePool.begin();
286  it != strangers->fGenePool.end(); it++ ) {
287  GiveHint( it->GetFactors(), it->GetFitness() );
288  }
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// add another population (strangers) to the one of this GeneticPopulation
293 
295 {
296  AddPopulation(&strangers);
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// trim the population to the predefined size
301 
303 {
304  std::sort(fGenePool.begin(), fGenePool.end());
305  while ( fGenePool.size() > (unsigned int) fPopulationSizeLimit )
306  fGenePool.pop_back();
307 }
308 
309 ////////////////////////////////////////////////////////////////////////////////
310 /// add an individual (a set of variables) to the population
311 /// if there is a set of variables which is known to perform good, they can be given as a hint to the population
312 
313 void TMVA::GeneticPopulation::GiveHint( std::vector< Double_t >& hint, Double_t fitness )
314 {
315  TMVA::GeneticGenes g(hint);
316  g.SetFitness(fitness);
317 
318  fGenePool.push_back( g );
319 }
320 
321 ////////////////////////////////////////////////////////////////////////////////
322 /// sort the genepool according to the fitness of the individuals
323 
325 {
326  std::sort(fGenePool.begin(), fGenePool.end());
327 }
328 
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void GiveHint(std::vector< Double_t > &hint, Double_t fitness=0)
add an individual (a set of variables) to the population if there is a set of variables which is know...
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
virtual void SetSeed(ULong_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:207
void MakeChildren()
Creates children out of members of the current generation.
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void MakeCopies(int number)
Produces offspring which is are copies of their parents.
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
void AddPopulation(GeneticPopulation *strangers)
add another population (strangers) to the one of this GeneticPopulation
void Mutate(Double_t probability=20, Int_t startIndex=0, Bool_t near=kFALSE, Double_t spread=0.1, Bool_t mirror=kFALSE)
Mutates the individuals in the genePool.
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
Definition: TRandom.cxx:341
std::vector< TMVA::GeneticRange * > fRanges
Cut optimisation interface class for genetic algorithm.
Definition: GeneticGenes.h:41
virtual ~GeneticPopulation()
destructor
void SetFitness(Double_t fitness)
Definition: GeneticGenes.h:51
GeneticPopulation(const std::vector< TMVA::Interval *> &ranges, Int_t size, UInt_t seed=0)
Constructor.
void TrimPopulation()
trim the population to the predefined size
void SetRandomSeed(UInt_t seed=0)
the random seed of the random generator
unsigned int UInt_t
Definition: RtypesCore.h:42
GeneticGenes * GetGenes(Int_t index)
gives back the "Genes" of the population with the given index.
void Print(Int_t untilIndex=-1)
make a little printout of the individuals up to index "untilIndex" this means, .
#define ClassImp(name)
Definition: Rtypes.h:359
std::vector< TMVA::GeneticGenes > fGenePool
double Double_t
Definition: RtypesCore.h:55
void Sort()
sort the genepool according to the fitness of the individuals
GeneticGenes MakeSex(GeneticGenes male, GeneticGenes female)
this function takes two individuals and produces offspring by mixing (recombining) their coefficients...
std::vector< Double_t > & GetFactors()
Definition: GeneticGenes.h:49
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Definition: TRandom.cxx:627
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Population definition for genetic algorithm.
TH1F * VariableDistribution(Int_t varNumber, Int_t bins, Int_t min, Int_t max)
give back a histogram with the distribution of the coefficients.
MsgLogger & Log() const
Range definition for genetic algorithm.
Definition: GeneticRange.h:42
const Int_t n
Definition: legend1.C:16
static constexpr double g