00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef Fennel_SamplingExecStreamGenerator_Included
00023 #define Fennel_SamplingExecStreamGenerator_Included
00024
00025 #include "fennel/test/ExecStreamGenerator.h"
00026 #include "fennel/lucidera/colstore/LcsRowScanExecStream.h"
00027 #include <math.h>
00028
00029 FENNEL_BEGIN_NAMESPACE
00030
00037 class BernoulliSamplingExecStreamGenerator
00038 : public MockProducerExecStreamGenerator
00039 {
00040 protected:
00041 boost::shared_ptr<MockProducerExecStreamGenerator> generator;
00042
00043 boost::scoped_ptr<BernoulliRng> rng;
00044
00045 uint nColumns;
00046 uint iChildRow;
00047 uint iLastRow;
00048 public:
00049 explicit BernoulliSamplingExecStreamGenerator(
00050 boost::shared_ptr<MockProducerExecStreamGenerator> const &generatorInit,
00051 float prob, uint seed, uint nColumnsInit)
00052 : generator(generatorInit),
00053 rng(new BernoulliRng(prob)),
00054 nColumns(nColumnsInit),
00055 iChildRow((uint) -1),
00056 iLastRow((uint) -1)
00057 {
00058 rng->reseed(seed);
00059 }
00060
00061 virtual int64_t generateValue(uint iRow, uint iCol)
00062 {
00063 if (iRow != iLastRow) {
00064 assert(iCol == 0);
00065
00066 iChildRow++;
00067 while (!rng->nextValue()) {
00068 for (int i = 0; i < nColumns; i++) {
00069 generator->generateValue(iChildRow, i);
00070 }
00071 iChildRow++;
00072 }
00073 iLastRow = iRow;
00074 }
00075
00076 return generator->generateValue(iChildRow, iCol);
00077 }
00078 };
00079
00080 class SystemSamplingExecStreamGenerator
00081 : public MockProducerExecStreamGenerator
00082 {
00083 protected:
00084 boost::shared_ptr<MockProducerExecStreamGenerator> generator;
00085
00086 uint nColumns;
00087 uint iChildRow;
00088 uint iLastRow;
00089
00090 uint clumpSize;
00091 uint clumpDistance;
00092 uint clumpPos;
00093
00094 public:
00095 explicit SystemSamplingExecStreamGenerator(
00096 boost::shared_ptr<MockProducerExecStreamGenerator> const &generatorInit,
00097 float rate, uint nRows, uint nColumnsInit, uint nClumps)
00098 : generator(generatorInit),
00099 nColumns(nColumnsInit),
00100 iChildRow((uint) -1),
00101 iLastRow((uint) -1),
00102 clumpPos((uint) -1)
00103 {
00104 uint sampleSize = (uint)round((double)nRows * (double)rate);
00105 clumpSize = (uint)round((double)sampleSize / (double)nClumps);
00106 clumpDistance =
00107 (uint)round((double)(nRows - sampleSize) / (double)(nClumps - 1));
00108
00109 uint rowsRequired =
00110 (clumpSize + clumpDistance) * (nClumps - 1) + clumpSize;
00111 if (rowsRequired > nRows && clumpDistance > 0) {
00112 clumpDistance--;
00113 }
00114
00115
00116
00117
00118 }
00119
00120 virtual int64_t generateValue(uint iRow, uint iCol)
00121 {
00122 if (iRow != iLastRow) {
00123 assert(iCol == 0);
00124
00125 iChildRow++;
00126 clumpPos++;
00127
00128 if (clumpPos >= clumpSize) {
00129
00130 for (uint i = 0; i < clumpDistance; i++) {
00131
00132 for (int j = 0; j < nColumns; j++) {
00133 generator->generateValue(iChildRow, j);
00134 }
00135 iChildRow++;
00136 }
00137 clumpPos = 0;
00138 }
00139 iLastRow = iRow;
00140
00141
00142 }
00143
00144 return generator->generateValue(iChildRow, iCol);
00145 }
00146 };
00147
00148 FENNEL_END_NAMESPACE
00149
00150 #endif
00151
00152