LcsColumnReader.cpp

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/lucidera/colstore/LcsColumnReader.cpp#16 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 LucidEra, Inc.
00005 // Copyright (C) 2005-2009 The Eigenbase Project
00006 //
00007 // This program is free software; you can redistribute it and/or modify it
00008 // under the terms of the GNU General Public License as published by the Free
00009 // Software Foundation; either version 2 of the License, or (at your option)
00010 // any later version approved by The Eigenbase Project.
00011 //
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 //
00017 // You should have received a copy of the GNU General Public License
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 */
00021 
00022 #include "fennel/common/CommonPreamble.h"
00023 #include "fennel/lucidera/colstore/LcsClusterReader.h"
00024 #include "fennel/lucidera/colstore/LcsColumnReader.h"
00025 #include "fennel/lucidera/colstore/LcsClusterReader.h"
00026 
00027 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsColumnReader.cpp#16 $");
00028 
00029 void LcsColumnReader::sync()
00030 {
00031     // Get batch using column's offset within cluster
00032     pBatch = &pScan->pRangeBatches[colOrd];
00033     pValues = pScan->pLeaf + pBatch->oVal;
00034     pBase = pScan->pLeaf - pScan->delta[colOrd];
00035 
00036     filters.filteringBitmap.resize(0);
00037 
00038     if (batchIsCompressed()) {
00039         // where the bit vectors start
00040         const PBuffer pBit = pValues + (sizeof(uint16_t) * pBatch->nVal);
00041         // # bits per value
00042         uint nBits = calcWidth(pBatch->nVal);
00043         // calculate bit vector widths
00044         iV = bitVecWidth(nBits, width);
00045         bitVecPtr(pBatch->nRow, iV, width, origin, (PBuffer) pBit);
00046 
00047         uint totWidth;
00048         if (iV == 1) {
00049             totWidth = width[0];
00050         } else if (iV == 2) {
00051             totWidth = width[0] + width[1];
00052         } else {
00053             totWidth = 0;
00054         }
00055 
00056         // hack to do one switch statement based on both width arguments
00057         // The switch value is unique for any of the following combos of
00058         // width 1 and 2:
00059         //      (8,-), (8,4), (8,2), (8,1), (4,-), (4,2), (4,1), (2,-), (2,1),
00060         //      (1,-)
00061         //
00062 
00063         switch (totWidth) {
00064         // single vector
00065         case 16:    // width 1 = 16
00066             pFuncReadBitVec = readBitVec16;
00067             break;
00068 
00069         case 8: // width 1 = 8
00070             pFuncReadBitVec = readBitVec8;
00071             break;
00072 
00073         case 4: // width 1 = 4
00074             pFuncReadBitVec = readBitVec4;
00075             break;
00076 
00077         case 2: // width 1 = 2
00078             pFuncReadBitVec = readBitVec2;
00079             break;
00080 
00081         case 1: // width 1 = 1
00082             pFuncReadBitVec = readBitVec1;
00083             break;
00084 
00085         // dual vector, first vector 8
00086 
00087         case 12:    // width 1 = 8, width 2 = 4
00088             pFuncReadBitVec = readBitVec12;
00089             break;
00090 
00091         case 10:    // width 1 = 8, width 2 = 2
00092             pFuncReadBitVec = readBitVec10;
00093             break;
00094 
00095         case 9: // width 1 = 8, width 2 = 1
00096             pFuncReadBitVec = readBitVec9;
00097             break;
00098 
00099         // dual vector, first vector 4
00100 
00101         case 6: // width 1 = 4, width 2 = 2
00102             pFuncReadBitVec = readBitVec6;
00103             break;
00104 
00105         case 5: // width 1 = 4, width 2 = 1
00106             pFuncReadBitVec = readBitVec5;
00107             break;
00108 
00109         // dual vector, first vector is 2
00110 
00111         case 3: // width 1 = 2, width 2 = 1
00112             pFuncReadBitVec = readBitVec3;
00113             break;
00114 
00115         // no bit vector stored
00116         case 0:
00117             pFuncReadBitVec = readBitVec0;
00118             break;
00119 
00120         default:
00121             assert(false);
00122             break;
00123         }
00124 
00125         // Set function pointer to get data
00126         pGetCurrentValueFunc = &LcsColumnReader::getCompressedValue;
00127 
00128         if (filters.hasResidualFilters) {
00129             /*
00130              * initializes bitmap
00131              */
00132             buildContainsMap();
00133         }
00134 
00135     } else if (batchIsFixed()) {
00136         // Set function pointer to get data in fixed case
00137         pGetCurrentValueFunc = &LcsColumnReader::getFixedValue;
00138     } else {
00139         // Set function pointer to get data in variable case
00140         pGetCurrentValueFunc = &LcsColumnReader::getVariableValue;
00141     }
00142 }
00143 
00144 const PBuffer LcsColumnReader::getCompressedValue()
00145 {
00146     return getBatchValue(getCurrentValueCode());
00147 }
00148 
00149 const PBuffer LcsColumnReader::getFixedValue()
00150 {
00151     return (const PBuffer)(pValues + (pScan->getRangePos() * pBatch->recSize));
00152 }
00153 
00154 const PBuffer LcsColumnReader::getVariableValue()
00155 {
00156     return (const PBuffer) (getBatchBase() +
00157         getBatchOffsets()[pScan->getRangePos()]);
00158 }
00159 
00160 void LcsColumnReader::readCompressedBatch(
00161     uint count, uint16_t *pValCodes, uint *pActCount)
00162 {
00163     *pActCount = std::min(count, pScan->getRangeRowsLeft());
00164     readBitVecs(
00165         pValCodes, iV, width, origin, pScan->getRangePos(), *pActCount);
00166 }
00167 
00168 uint16_t LcsColumnReader::getCurrentValueCode() const
00169 {
00170     assert(batchIsCompressed());
00171     uint16_t nValCode;
00172     pFuncReadBitVec(&nValCode, origin, pScan->getRangePos());
00173     return nValCode;
00174 }
00175 
00176 bool LcsColumnReader::applyFilters(
00177     TupleDescriptor &projDescriptor, TupleData &outputTupleData)
00178 {
00179     if (!filters.filteringBitmap.empty()) {
00180         /*
00181          * bitmap filtering
00182          */
00183         return (filters.filteringBitmap.test(getCurrentValueCode()));
00184     }
00185 
00186     for (uint k = 0; k < filters.filterData.size(); k++) {
00187         LcsResidualFilter *filter = filters.filterData[k].get();
00188 
00189         if (filter->lowerBoundDirective != SEARCH_UNBOUNDED_LOWER) {
00190             int c = filters.inputKeyDesc.compareTuples(
00191                 filter->boundData, filters.lowerBoundProj,
00192                 outputTupleData, filters.readerKeyProj);
00193 
00194             if (filter->lowerBoundDirective == SEARCH_CLOSED_LOWER) {
00195                 if (c > 0) {
00196                     continue;
00197                 }
00198             } else {
00199                 if (c >= 0) {
00200                     continue;
00201                 }
00202             }
00203         }
00204 
00205         if (filter->upperBoundDirective == SEARCH_UNBOUNDED_UPPER) {
00206             return true;
00207         }
00208 
00209         int c = filters.inputKeyDesc.compareTuples(
00210             filter->boundData, filters.upperBoundProj,
00211             outputTupleData, filters.readerKeyProj);
00212 
00213         if (filter->upperBoundDirective == SEARCH_CLOSED_UPPER) {
00214             if (c >= 0) {
00215                 return true;
00216             }
00217         } else {
00218             if (c > 0) {
00219                 return true;
00220             }
00221         }
00222     }
00223 
00224     return false;
00225 }
00226 
00227 uint LcsColumnReader::findVal(
00228     uint filterPos,
00229     bool highBound,
00230     bool bStrict,
00231     TupleDataWithBuffer &readerKeyData)
00232 {
00233     // REVIEW jvs 5-Sept-2006:  It would be nice to use std::lower_bound
00234     // and std::upper_bound instead of reimplementing binary search.
00235     // This is low priority because it takes a bit of messing around
00236     // to get iterators and functors in a form STL likes, and because
00237     // (I'm assuming) the code below was taken straight from Broadbase,
00238     // where it was already heavily exercised.
00239 
00240     uint iLo = 0, iHi = getBatchValCount(), iResult;
00241     int cmp = 0;
00242     TupleProjection &boundProj = highBound ?
00243         filters.upperBoundProj : filters.lowerBoundProj;
00244 
00245     // If nVals == 0, then iLo == iHi == 0, and we return 0.
00246     while (iLo < iHi) {
00247         uint iMid = (iLo + iHi) / 2;
00248 
00249         filters.attrAccessor.loadValue(
00250             readerKeyData[0],
00251             getBatchValue(iMid));
00252 
00253         cmp = filters.inputKeyDesc.compareTuples(
00254             readerKeyData, allProj,
00255             filters.filterData[filterPos]->boundData, boundProj);
00256 
00257         // reset datum pointers in case tuple just read contained nulls
00258         readerKeyData.resetBuffer();
00259 
00260         if (cmp == 0) {
00261             if (bStrict && !highBound) {
00262                 iResult = iMid + 1;
00263             } else {
00264                 if (!bStrict && highBound) {
00265                     iResult = iMid + 1;
00266                 } else {
00267                     iResult = iMid;
00268                 }
00269             }
00270             return iResult;
00271         } else if (cmp > 0) {
00272             iHi = iMid;
00273         } else {
00274             iLo = iMid + 1;
00275         }
00276     }
00277 
00278     // We now know that no val[i] == key, so we don't worry about strictness.
00279     assert(iLo == iHi);
00280     if (cmp < 0) {                  // key < val[iMid]
00281         iResult = iHi;
00282     } else {
00283         // val[iMid] < key, so key < val[iMid+1]
00284         iResult = iLo;
00285     }
00286 
00287     return iResult;
00288 }
00289 
00290 void LcsColumnReader::findBounds(
00291     uint filterPos,
00292     uint &nLoVal,
00293     uint &nHiVal,
00294     TupleDataWithBuffer &readerKeyData)
00295 {
00296     LcsResidualFilter *filter = filters.filterData[filterPos].get();
00297     bool getLowerSet = filter->lowerBoundDirective != SEARCH_UNBOUNDED_LOWER;
00298     bool getLowerBoundStrict =
00299         filter->lowerBoundDirective != SEARCH_CLOSED_LOWER;
00300     bool getUpperSet = filter->upperBoundDirective != SEARCH_UNBOUNDED_UPPER;
00301     bool getUpperBoundStrict =
00302         filter->upperBoundDirective != SEARCH_CLOSED_UPPER;
00303 
00304     nLoVal =
00305         getLowerSet
00306         ? findVal(filterPos, false, getLowerBoundStrict, readerKeyData)
00307         : 0;
00308     nHiVal =
00309         getUpperSet
00310         ? findVal(filterPos, true, getUpperBoundStrict, readerKeyData)
00311         : getBatchValCount();
00312 }
00313 
00314 void LcsColumnReader::buildContainsMap()
00315 {
00316     uint nVals = getBatchValCount();
00317 
00318     filters.filteringBitmap.resize(nVals);
00319 
00320     for (uint i = 0; i < filters.filterData.size(); i++) {
00321         uint nLoVal, nHiVal;
00322 
00323         findBounds(i, nLoVal, nHiVal, filters.readerKeyData);
00324 
00325         for (uint b = nLoVal; b < nHiVal; b++) {
00326             filters.filteringBitmap.set(b);
00327         }
00328     }
00329 }
00330 
00331 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsColumnReader.cpp#16 $");
00332 
00333 // End LcsColumnReader.cpp

Generated on Mon Jun 22 04:00:19 2009 for Fennel by  doxygen 1.5.1