SamplingExecStreamGenerator.h

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/lucidera/test/SamplingExecStreamGenerator.h#4 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2007-2009 LucidEra, Inc.
00005 // Copyright (C) 2007-2009 The Eigenbase Project
00006 //
00007 // This program is free software; you can redistribute it and/or modify it
00008 // under the terms of the GNU General Public License as published by the Free
00009 // Software Foundation; either version 2 of the License, or (at your option)
00010 // any later version approved by The Eigenbase Project.
00011 //
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 //
00017 // You should have received a copy of the GNU General Public License
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 */
00021 
00022 #ifndef Fennel_SamplingExecStreamGenerator_Included
00023 #define Fennel_SamplingExecStreamGenerator_Included
00024 
00025 #include "fennel/test/ExecStreamGenerator.h"
00026 #include "fennel/lucidera/colstore/LcsRowScanExecStream.h"
00027 #include <math.h>
00028 
00029 FENNEL_BEGIN_NAMESPACE
00030 
00037 class BernoulliSamplingExecStreamGenerator
00038     : public MockProducerExecStreamGenerator
00039 {
00040 protected:
00041     boost::shared_ptr<MockProducerExecStreamGenerator> generator;
00042 
00043     boost::scoped_ptr<BernoulliRng> rng;
00044 
00045     uint nColumns;
00046     uint iChildRow;
00047     uint iLastRow;
00048 public:
00049     explicit BernoulliSamplingExecStreamGenerator(
00050         boost::shared_ptr<MockProducerExecStreamGenerator> const &generatorInit,
00051         float prob, uint seed, uint nColumnsInit)
00052         : generator(generatorInit),
00053           rng(new BernoulliRng(prob)),
00054           nColumns(nColumnsInit),
00055           iChildRow((uint) -1),
00056           iLastRow((uint) -1)
00057     {
00058         rng->reseed(seed);
00059     }
00060 
00061     virtual int64_t generateValue(uint iRow, uint iCol)
00062     {
00063         if (iRow != iLastRow) {
00064             assert(iCol == 0);
00065 
00066             iChildRow++;
00067             while (!rng->nextValue()) {
00068                 for (int i = 0; i < nColumns; i++) {
00069                     generator->generateValue(iChildRow, i);
00070                 }
00071                 iChildRow++;
00072             }
00073             iLastRow = iRow;
00074         }
00075 
00076         return generator->generateValue(iChildRow, iCol);
00077     }
00078 };
00079 
00080 class SystemSamplingExecStreamGenerator
00081     : public MockProducerExecStreamGenerator
00082 {
00083 protected:
00084     boost::shared_ptr<MockProducerExecStreamGenerator> generator;
00085 
00086     uint nColumns;
00087     uint iChildRow;
00088     uint iLastRow;
00089 
00090     uint clumpSize;
00091     uint clumpDistance;
00092     uint clumpPos;
00093 
00094 public:
00095     explicit SystemSamplingExecStreamGenerator(
00096         boost::shared_ptr<MockProducerExecStreamGenerator> const &generatorInit,
00097         float rate, uint nRows, uint nColumnsInit, uint nClumps)
00098         : generator(generatorInit),
00099           nColumns(nColumnsInit),
00100           iChildRow((uint) -1),
00101           iLastRow((uint) -1),
00102           clumpPos((uint) -1)
00103     {
00104         uint sampleSize = (uint)round((double)nRows * (double)rate);
00105         clumpSize = (uint)round((double)sampleSize / (double)nClumps);
00106         clumpDistance =
00107             (uint)round((double)(nRows - sampleSize) / (double)(nClumps - 1));
00108 
00109         uint rowsRequired =
00110             (clumpSize + clumpDistance) * (nClumps - 1) + clumpSize;
00111         if (rowsRequired > nRows && clumpDistance > 0) {
00112             clumpDistance--;
00113         }
00114 
00115 //        std::cout << "sampleSize " << sampleSize << std::endl;
00116 //        std::cout << "clumpSize " << clumpSize << std::endl;
00117 //        std::cout << "clumpDistance " << clumpDistance << std::endl;
00118     }
00119 
00120     virtual int64_t generateValue(uint iRow, uint iCol)
00121     {
00122         if (iRow != iLastRow) {
00123             assert(iCol == 0);
00124 
00125             iChildRow++;
00126             clumpPos++;
00127 
00128             if (clumpPos >= clumpSize) {
00129                 // Skip clumpDistance rows
00130                 for (uint i = 0; i < clumpDistance; i++) {
00131 //                    std::cout << "skip " << iChildRow << std::endl;
00132                     for (int j = 0; j < nColumns; j++) {
00133                         generator->generateValue(iChildRow, j);
00134                     }
00135                     iChildRow++;
00136                 }
00137                 clumpPos = 0;
00138             }
00139             iLastRow = iRow;
00140 
00141 //            std::cout << "gen " << iChildRow << std::endl;
00142         }
00143 
00144         return generator->generateValue(iChildRow, iCol);
00145     }
00146 };
00147 
00148 FENNEL_END_NAMESPACE
00149 
00150 #endif
00151 
00152 // End SamplingExecStreamGenerator.h

Generated on Mon Jun 22 04:00:20 2009 for Fennel by  doxygen 1.5.1