Honeycomb  0.1
Component-Model Framework
Bootstrap.h
Go to the documentation of this file.
1 // Honeycomb, Copyright (C) 2015 NewGamePlus Inc. Distributed under the Boost Software License v1.0.
2 #pragma once
3 
5 
6 namespace honey
7 {
8 
10 
26 template<class SampleT, sdt Dim = 1, class Real__ = Real>
27 class Bootstrap
28 {
29 public:
30  typedef Real__ Real;
31 private:
32  typedef typename Numeral<Real>::Real_ Real_;
33  typedef typename Real_::DoubleType Double_;
34  typedef typename Double_::Real Double;
35  typedef Alge_<Real> Alge;
36  typedef Interp_<Real> Interp;
37  typedef Gaussian_<Real> Gaussian;
38 
39 public:
40  static const sdt dim = Dim;
41  typedef Vec<dim,Real> Vec;
42  typedef vector<const SampleT*> SampleList;
43  typedef function<Vec (const SampleList&)> Func;
44 
46 
59  Bootstrap(const Func& func, RandomGen& gen, const vector<SampleT>& samples, Real alpha = 0.05, szt bootSampleCount = 50) :
60  func(func), gen(gen), samples(samples), alpha(alpha), bootSampleCount(bootSampleCount),
61  _lower(0), _upper(0), _progress(0), sampleTotal(0), idx(0),
62  bootRes(nullptr), jackRes(nullptr), jackMean(0)
63  {
64  if (samples.size() == 0) return;
65  bootRes.set(new Vec[bootSampleCount]);
66  bootSamples.resize(samples.size());
67  jackRes.set(new Vec[samples.size()]);
68  }
69 
71 
73 
79  void calc(Real progressDelta = 1);
80 
82  Real progress() const { return _progress; }
84  const Vec& lower() const { return _lower; }
86  const Vec& upper() const { return _upper; }
87 
89  struct meanFunc
90  {
91  Vec operator()(const SampleList& samples)
92  {
93  Vec sum = reduce(samples, Vec().fromZero(), [](const Vec& a, const SampleT* e) { return a + *e; });
94  return samples.size() > 0 ? sum / (Real)samples.size() : Vec().fromZero();
95  }
96  };
97 
99  struct varianceFunc
100  {
101  Vec operator()(const SampleList& samples)
102  {
103  Vec mean = meanFunc()(samples);
104  Vec sumDev = reduce(samples, Vec().fromZero(), [&](const Vec& a, const SampleT* e) -> Real { return a + (*e - mean).elemSqr(); });
105  return samples.size() > 1 ? sumDev / (Real)(samples.size()-1) : Vec().fromZero();
106  }
107  };
108 
109 private:
110  //Input
111  Func func;
112  RandomGen& gen;
113  const vector<SampleT>& samples;
114  Real alpha;
115  szt bootSampleCount;
116 
117  //Output
118  Vec _lower;
119  Vec _upper;
120 
121  //Progress
122  Real _progress;
123  sdt sampleTotal;
124  sdt idx;
125 
126  //Calc locals
127  UniquePtr<Vec[]> bootRes;
128  SampleList bootSamples;
129  Vec origRes;
130  UniquePtr<Vec[]> jackRes;
131  Vec jackMean;
132 };
133 
134 template<class SampleT, sdt Dim, class Real>
136 {
137  if (_progress == 1)
138  return;
139 
140  _progress = Alge::min(_progress + progressDelta, 1);
141  sdt sampleAcc = _progress*(bootSampleCount + samples.size()) - sampleTotal;
142  sampleTotal += sampleAcc;
143 
144  if (samples.size() == 0)
145  return;
146 
147  sdt sampleCount = 0;
148 
149  if (sampleTotal - sampleAcc < bootSampleCount)
150  {
151  //Get function result with bootstrap samples (random sampling with replacement)
152  for (; idx < bootSampleCount; ++idx, ++sampleCount)
153  {
154  if (sampleCount >= sampleAcc)
155  return;
156  //Build bootstrap samples
157  for (szt i = 0; i < samples.size(); ++i)
158  bootSamples[i] = &samples[Discrete_d(gen, 0, samples.size()-1).next()];
159  //Add bootstrap result
160  bootRes[idx] = func(const_cast<const SampleList&>(bootSamples));
161  }
162 
163  //Get function result with original sample data
164  for (szt i = 0; i < samples.size(); ++i)
165  bootSamples[i] = &samples[i];
166  origRes = func(const_cast<const SampleList&>(bootSamples));
167 
168  idx = 0;
169  }
170 
171  //Get function result with jackknife samples (every sample is omitted once)
172  //and calc the jackknife estimator (mean), which is used to estimate bias/variance
173  for (; idx < samples.size(); ++idx, ++sampleCount)
174  {
175  if (sampleCount >= sampleAcc)
176  return;
177  //Erase one sample
178  bootSamples.erase(bootSamples.begin() + idx);
179  //Add jackknife result
180  jackRes[idx] = func(const_cast<const SampleList&>(bootSamples));
181  jackMean += jackRes[idx];
182  //Re-insert sample
183  bootSamples.insert(bootSamples.begin() + idx, &samples[idx]);
184  }
185  jackMean /= Real(samples.size());
186 
187  vector<Real> ecdf(bootSampleCount);
188 
189  for (sdt i = 0; i < Vec::s_size; ++i)
190  {
191  //Bias, also build empirical cdf (sorted boot results)
192  Real sumLess = 0;
193  Real sumEq = 0;
194  for (szt j = 0; j < bootSampleCount; ++j)
195  {
196  Real val = bootRes[j][i];
197  if (val < origRes[i])
198  sumLess++;
199  else if (val == origRes[i])
200  sumEq++;
201  ecdf[j] = val;
202  }
203  Real z = Gaussian().cdfInv((sumLess + sumEq/2) / bootSampleCount);
204  std::sort(ecdf.begin(), ecdf.end());
205 
206  //Acceleration
207  Real sumDevSqr = 0;
208  Real sumDevCube = 0;
209  for (szt j = 0; j < samples.size(); ++j)
210  {
211  Real dev = jackMean[i] - jackRes[j][i];
212  Real sqr = Alge::sqr(dev);
213  sumDevSqr += sqr;
214  sumDevCube += sqr*dev;
215  }
216  Real acc = (sumDevCube / Alge::pow(sumDevSqr, 1.5)) / 6;
217 
218  //Apply bias+acc correction
219  Real za = Gaussian().cdfInv(alpha / 2);
220  Real idxLower = Gaussian().cdf(z + (z+za) / (1-acc*(z+za))) * (ecdf.size()-1);
221  Real idxUpper = Gaussian().cdf(z + (z-za) / (1-acc*(z-za))) * (ecdf.size()-1);
222 
223  //Empirical cdf, interpolate fractional index
224  _lower[i] = idxLower < ecdf.size()-1 ? Interp::linear(Alge::frac(idxLower), ecdf[idxLower], ecdf[idxLower+1]) : ecdf[idxLower];
225  _upper[i] = idxUpper < ecdf.size()-1 ? Interp::linear(Alge::frac(idxUpper), ecdf[idxUpper], ecdf[idxUpper+1]) : ecdf[idxUpper];
226  }
227 
228  //Calculation is complete
229  _progress = 1;
230 }
231 
232 }
233 
Real progress() const
Current progress of calculation, from 0 (start) to 1 (complete).
Definition: Bootstrap.h:82
static Real pow(Real x, Real y)
x raised to exponent y
Definition: Alge.h:73
Algebra.
Definition: Alge.h:13
Random number generator interface.
Definition: Gen.h:11
Gaussian_< Real > Gaussian
Definition: Gaussian.h:56
~Bootstrap()
Definition: Bootstrap.h:70
static Real frac(Real x)
Remove the whole part, leaving just the fraction.
Definition: Alge.h:42
Vec< dim, Real > Vec
Definition: Bootstrap.h:41
ptrdiff_t sdt
Size difference type, shorthand for ptrdiff_t.
Definition: Core.h:92
Interpolation math.
Definition: Quat.h:10
Vec operator()(const SampleList &samples)
Definition: Bootstrap.h:91
Accum reduce(Range &&, Seqs &&..., Accum &&initVal, Func &&)
Accumulate a series of sequences into an output.
static const sdt dim
Definition: Bootstrap.h:40
static std::common_type< Num, Num2 >::type min(Num a, Num2 b)
Get the minimum of two numbers.
Definition: Alge.h:89
vector< const SampleT * > SampleList
Definition: Bootstrap.h:42
function< Vec(const SampleList &)> Func
Definition: Bootstrap.h:43
Bootstrap(const Func &func, RandomGen &gen, const vector< SampleT > &samples, Real alpha=0.05, szt bootSampleCount=50)
Constructor, set up constants for all calculation calls.
Definition: Bootstrap.h:59
float Real
Real number type. See Real_ for real number operations and constants.
Definition: Real.h:21
Numeric type information, use numeral() to get instance safely from a static context.
Definition: Numeral.h:17
size_t szt
Size type, shorthand for size_t.
Definition: Core.h:90
Discrete_< int64 > Discrete_d
Definition: Discrete.h:52
MatrixS & fromZero()
Zero all elements.
Definition: Base.h:70
void calc(Real progressDelta=1)
Perform bootstrap calculation. The calculation can be split up over multiple calls.
Definition: Bootstrap.h:135
double Real
Definition: Real.h:14
const Vec & lower() const
Get lower bound of confidence interval (calculation must be complete)
Definition: Bootstrap.h:84
Vec operator()(const SampleList &samples)
Definition: Bootstrap.h:101
static T linear(Real t, const T &a, const T &b)
Linear interpolation. t range is [0,1].
Definition: Interp.h:28
static Num sqr(Num x)
Square.
Definition: Alge.h:60
Real__ Real
Definition: Bootstrap.h:30
const Vec & upper() const
Get upper bound of confidence interval (calculation must be complete)
Definition: Bootstrap.h:86
Functor to estimate the sample variance. This is the unbiased estimator (mean over all possible sampl...
Definition: Bootstrap.h:99
Monte Carlo (random-based) method to estimate the interval of confidence in a function's result...
Definition: Bootstrap.h:27
Pointer to a unique, non-shared, object. Finalizer is run upon destruction (deletes object by default...
Definition: SharedPtr.h:164
Generate a normally (Gaussian) distributed random variate.
Definition: Gaussian.h:26
Functor to estimate the sample mean.
Definition: Bootstrap.h:89
Global Honeycomb namespace.