#include <LcsColumnReader.h>
Public Member Functions | |
void | init (LcsClusterReader *pScanInit, uint colOrdInit) |
Initializes a scan of column "colOrdInit". | |
void | sync () |
Synchronizes batches for each column when the scan moves to a new range. | |
bool | batchIsCompressed () const |
Returns true if current batch for column is compressed. | |
bool | batchIsFixed () const |
Returns true if current batch for column is fixed. | |
const PBuffer | getCurrentValue () |
Returns current value code for a compressed batch entry. | |
uint16_t | getCurrentValueCode () const |
Gets the code (between 0 and GetBatchValCount() - 1) of the current value for a compressed batch. | |
uint | getBatchValCount () const |
Returns number of distinct values in batch. | |
const PBuffer | getBatchBase () const |
Returns base pointer of batch. | |
const uint16_t * | getBatchOffsets () const |
Returns table of offsets from base. | |
const PBuffer | getBatchValue (uint iValCode) const |
Gets iValCode-th value. | |
void | readCompressedBatch (uint count, uint16_t *pValCodes, uint *pActCount) |
Reads up to "count" value-codes into the "pValCodes" table. | |
LcsResidualColumnFilters & | getFilters () |
| |
bool | applyFilters (TupleDescriptor &projDescriptor, TupleData &outputTupleData) |
Applies the filters. | |
Private Member Functions | |
const PBuffer | getCompressedValue () |
Returns value from compressed batch. | |
const PBuffer | getFixedValue () |
Returns value from fixed batch. | |
const PBuffer | getVariableValue () |
Returns value from variable batch. | |
uint | findVal (uint filterPos, bool highBound, bool bStrict, TupleDataWithBuffer &readerKeyData) |
Locates the smallest value in the compressed batch that's greater or equal to a filter predicate's bound. | |
void | findBounds (uint filterPos, uint &nLoVal, uint &nHiVal, TupleDataWithBuffer &readerKeyData) |
Locates the range of entries in the compressed batch that passes a filter predicate. | |
void | buildContainsMap () |
Builds the contains bitmap for compressed batch. | |
Private Attributes | |
LcsClusterReader * | pScan |
Parent cluster reader object. | |
uint | colOrd |
Ordinal of column in cluster (0-based). | |
PLcsBatchDir | pBatch |
Batch corresponding to column. | |
PBuffer | pValues |
Values of the batch. | |
PBuffer | pBase |
Base address from which offsets are stored. | |
PtrVec | origin |
Pointers to the origin of each bit vector. | |
WidthVec | width |
Width of bit vectors. | |
uint | iV |
PBitVecFuncPtr | pFuncReadBitVec |
Pointer to appropriate bit vector read function. | |
const PBuffer(LcsColumnReader::* | pGetCurrentValueFunc )() |
Pointer to function that retrieves the current value of the column from a fixed mode batch. | |
LcsResidualColumnFilters | filters |
Filters associated with this column. | |
TupleProjection | allProj |
Projection for readerKeyData. |
Definition at line 35 of file LcsColumnReader.h.
const PBuffer LcsColumnReader::getCompressedValue | ( | ) | [private] |
Returns value from compressed batch.
Definition at line 144 of file LcsColumnReader.cpp.
References getBatchValue(), and getCurrentValueCode().
Referenced by sync().
00145 { 00146 return getBatchValue(getCurrentValueCode()); 00147 }
const PBuffer LcsColumnReader::getFixedValue | ( | ) | [private] |
Returns value from fixed batch.
Definition at line 149 of file LcsColumnReader.cpp.
References LcsClusterReader::getRangePos(), pBatch, pScan, pValues, and LcsBatchDir::recSize.
Referenced by sync().
const PBuffer LcsColumnReader::getVariableValue | ( | ) | [private] |
Returns value from variable batch.
Definition at line 154 of file LcsColumnReader.cpp.
References getBatchBase(), getBatchOffsets(), LcsClusterReader::getRangePos(), and pScan.
Referenced by sync().
00155 { 00156 return (const PBuffer) (getBatchBase() + 00157 getBatchOffsets()[pScan->getRangePos()]); 00158 }
uint LcsColumnReader::findVal | ( | uint | filterPos, | |
bool | highBound, | |||
bool | bStrict, | |||
TupleDataWithBuffer & | readerKeyData | |||
) | [private] |
Locates the smallest value in the compressed batch that's greater or equal to a filter predicate's bound.
filterPos | index into filters.filterData | |
highBound | true iff called for upper bound data | |
bStrict | if true, find the entry greater than the filter predicate's bound | |
readerKeyData | TupleData used for comparison |
Definition at line 227 of file LcsColumnReader.cpp.
References allProj, LcsResidualColumnFilters::attrAccessor, TupleDescriptor::compareTuples(), LcsResidualColumnFilters::filterData, filters, getBatchValCount(), getBatchValue(), LcsResidualColumnFilters::inputKeyDesc, UnalignedAttributeAccessor::loadValue(), LcsResidualColumnFilters::lowerBoundProj, TupleDataWithBuffer::resetBuffer(), and LcsResidualColumnFilters::upperBoundProj.
Referenced by findBounds().
00232 { 00233 // REVIEW jvs 5-Sept-2006: It would be nice to use std::lower_bound 00234 // and std::upper_bound instead of reimplementing binary search. 00235 // This is low priority because it takes a bit of messing around 00236 // to get iterators and functors in a form STL likes, and because 00237 // (I'm assuming) the code below was taken straight from Broadbase, 00238 // where it was already heavily exercised. 00239 00240 uint iLo = 0, iHi = getBatchValCount(), iResult; 00241 int cmp = 0; 00242 TupleProjection &boundProj = highBound ? 00243 filters.upperBoundProj : filters.lowerBoundProj; 00244 00245 // If nVals == 0, then iLo == iHi == 0, and we return 0. 00246 while (iLo < iHi) { 00247 uint iMid = (iLo + iHi) / 2; 00248 00249 filters.attrAccessor.loadValue( 00250 readerKeyData[0], 00251 getBatchValue(iMid)); 00252 00253 cmp = filters.inputKeyDesc.compareTuples( 00254 readerKeyData, allProj, 00255 filters.filterData[filterPos]->boundData, boundProj); 00256 00257 // reset datum pointers in case tuple just read contained nulls 00258 readerKeyData.resetBuffer(); 00259 00260 if (cmp == 0) { 00261 if (bStrict && !highBound) { 00262 iResult = iMid + 1; 00263 } else { 00264 if (!bStrict && highBound) { 00265 iResult = iMid + 1; 00266 } else { 00267 iResult = iMid; 00268 } 00269 } 00270 return iResult; 00271 } else if (cmp > 0) { 00272 iHi = iMid; 00273 } else { 00274 iLo = iMid + 1; 00275 } 00276 } 00277 00278 // We now know that no val[i] == key, so we don't worry about strictness. 00279 assert(iLo == iHi); 00280 if (cmp < 0) { // key < val[iMid] 00281 iResult = iHi; 00282 } else { 00283 // val[iMid] < key, so key < val[iMid+1] 00284 iResult = iLo; 00285 } 00286 00287 return iResult; 00288 }
void LcsColumnReader::findBounds | ( | uint | filterPos, | |
uint & | nLoVal, | |||
uint & | nHiVal, | |||
TupleDataWithBuffer & | readerKeyData | |||
) | [private] |
Locates the range of entries in the compressed batch that passes a filter predicate.
filterPos | index into filters.filterData | |
[out] | nLoVal | index of the lower bound |
[out] | nHiVal | index of the upper bound |
readerKeyData | TupleData used for comparison |
Definition at line 290 of file LcsColumnReader.cpp.
References LcsResidualColumnFilters::filterData, filters, findVal(), getBatchValCount(), LcsResidualFilter::lowerBoundDirective, SEARCH_CLOSED_LOWER, SEARCH_CLOSED_UPPER, SEARCH_UNBOUNDED_LOWER, SEARCH_UNBOUNDED_UPPER, and LcsResidualFilter::upperBoundDirective.
Referenced by buildContainsMap().
00295 { 00296 LcsResidualFilter *filter = filters.filterData[filterPos].get(); 00297 bool getLowerSet = filter->lowerBoundDirective != SEARCH_UNBOUNDED_LOWER; 00298 bool getLowerBoundStrict = 00299 filter->lowerBoundDirective != SEARCH_CLOSED_LOWER; 00300 bool getUpperSet = filter->upperBoundDirective != SEARCH_UNBOUNDED_UPPER; 00301 bool getUpperBoundStrict = 00302 filter->upperBoundDirective != SEARCH_CLOSED_UPPER; 00303 00304 nLoVal = 00305 getLowerSet 00306 ? findVal(filterPos, false, getLowerBoundStrict, readerKeyData) 00307 : 0; 00308 nHiVal = 00309 getUpperSet 00310 ? findVal(filterPos, true, getUpperBoundStrict, readerKeyData) 00311 : getBatchValCount(); 00312 }
void LcsColumnReader::buildContainsMap | ( | ) | [private] |
Builds the contains bitmap for compressed batch.
Definition at line 314 of file LcsColumnReader.cpp.
References LcsResidualColumnFilters::filterData, LcsResidualColumnFilters::filteringBitmap, filters, findBounds(), getBatchValCount(), and LcsResidualColumnFilters::readerKeyData.
Referenced by sync().
00315 { 00316 uint nVals = getBatchValCount(); 00317 00318 filters.filteringBitmap.resize(nVals); 00319 00320 for (uint i = 0; i < filters.filterData.size(); i++) { 00321 uint nLoVal, nHiVal; 00322 00323 findBounds(i, nLoVal, nHiVal, filters.readerKeyData); 00324 00325 for (uint b = nLoVal; b < nHiVal; b++) { 00326 filters.filteringBitmap.set(b); 00327 } 00328 } 00329 }
void LcsColumnReader::init | ( | LcsClusterReader * | pScanInit, | |
uint | colOrdInit | |||
) | [inline] |
Initializes a scan of column "colOrdInit".
pScanInit | cluster reader containing column to be initialized | |
colOrdInit | column number within cluster; 0-based |
Definition at line 165 of file LcsColumnReader.h.
00166 { 00167 pScan = pScanInit; 00168 colOrd = colOrdInit; 00169 filters.hasResidualFilters = false; 00170 filters.filterDataInitialized = false; 00171 allProj.push_back(0); 00172 }
void LcsColumnReader::sync | ( | ) |
Synchronizes batches for each column when the scan moves to a new range.
Definition at line 29 of file LcsColumnReader.cpp.
References batchIsCompressed(), batchIsFixed(), bitVecPtr(), bitVecWidth(), buildContainsMap(), calcWidth(), colOrd, LcsClusterAccessBase::delta, LcsResidualColumnFilters::filteringBitmap, filters, getCompressedValue(), getFixedValue(), getVariableValue(), LcsResidualColumnFilters::hasResidualFilters, iV, LcsBatchDir::nRow, LcsBatchDir::nVal, origin, LcsBatchDir::oVal, pBase, pBatch, pFuncReadBitVec, pGetCurrentValueFunc, LcsClusterReader::pLeaf, LcsClusterReader::pRangeBatches, pScan, pValues, readBitVec0(), readBitVec1(), readBitVec10(), readBitVec12(), readBitVec16(), readBitVec2(), readBitVec3(), readBitVec4(), readBitVec5(), readBitVec6(), readBitVec8(), readBitVec9(), and width.
00030 { 00031 // Get batch using column's offset within cluster 00032 pBatch = &pScan->pRangeBatches[colOrd]; 00033 pValues = pScan->pLeaf + pBatch->oVal; 00034 pBase = pScan->pLeaf - pScan->delta[colOrd]; 00035 00036 filters.filteringBitmap.resize(0); 00037 00038 if (batchIsCompressed()) { 00039 // where the bit vectors start 00040 const PBuffer pBit = pValues + (sizeof(uint16_t) * pBatch->nVal); 00041 // # bits per value 00042 uint nBits = calcWidth(pBatch->nVal); 00043 // calculate bit vector widths 00044 iV = bitVecWidth(nBits, width); 00045 bitVecPtr(pBatch->nRow, iV, width, origin, (PBuffer) pBit); 00046 00047 uint totWidth; 00048 if (iV == 1) { 00049 totWidth = width[0]; 00050 } else if (iV == 2) { 00051 totWidth = width[0] + width[1]; 00052 } else { 00053 totWidth = 0; 00054 } 00055 00056 // hack to do one switch statement based on both width arguments 00057 // The switch value is unique for any of the following combos of 00058 // width 1 and 2: 00059 // (8,-), (8,4), (8,2), (8,1), (4,-), (4,2), (4,1), (2,-), (2,1), 00060 // (1,-) 00061 // 00062 00063 switch (totWidth) { 00064 // single vector 00065 case 16: // width 1 = 16 00066 pFuncReadBitVec = readBitVec16; 00067 break; 00068 00069 case 8: // width 1 = 8 00070 pFuncReadBitVec = readBitVec8; 00071 break; 00072 00073 case 4: // width 1 = 4 00074 pFuncReadBitVec = readBitVec4; 00075 break; 00076 00077 case 2: // width 1 = 2 00078 pFuncReadBitVec = readBitVec2; 00079 break; 00080 00081 case 1: // width 1 = 1 00082 pFuncReadBitVec = readBitVec1; 00083 break; 00084 00085 // dual vector, first vector 8 00086 00087 case 12: // width 1 = 8, width 2 = 4 00088 pFuncReadBitVec = readBitVec12; 00089 break; 00090 00091 case 10: // width 1 = 8, width 2 = 2 00092 pFuncReadBitVec = readBitVec10; 00093 break; 00094 00095 case 9: // width 1 = 8, width 2 = 1 00096 pFuncReadBitVec = readBitVec9; 00097 break; 00098 00099 // dual vector, first vector 4 00100 00101 case 6: // width 1 = 4, width 2 = 2 00102 pFuncReadBitVec = readBitVec6; 00103 break; 00104 00105 case 5: // width 1 = 4, width 2 = 1 00106 pFuncReadBitVec = readBitVec5; 00107 break; 00108 00109 // dual vector, first vector is 2 00110 00111 case 3: // width 1 = 2, width 2 = 1 00112 pFuncReadBitVec = readBitVec3; 00113 break; 00114 00115 // no bit vector stored 00116 case 0: 00117 pFuncReadBitVec = readBitVec0; 00118 break; 00119 00120 default: 00121 assert(false); 00122 break; 00123 } 00124 00125 // Set function pointer to get data 00126 pGetCurrentValueFunc = &LcsColumnReader::getCompressedValue; 00127 00128 if (filters.hasResidualFilters) { 00129 /* 00130 * initializes bitmap 00131 */ 00132 buildContainsMap(); 00133 } 00134 00135 } else if (batchIsFixed()) { 00136 // Set function pointer to get data in fixed case 00137 pGetCurrentValueFunc = &LcsColumnReader::getFixedValue; 00138 } else { 00139 // Set function pointer to get data in variable case 00140 pGetCurrentValueFunc = &LcsColumnReader::getVariableValue; 00141 } 00142 }
bool LcsColumnReader::batchIsCompressed | ( | ) | const [inline] |
Returns true if current batch for column is compressed.
Definition at line 183 of file LcsColumnReader.h.
References LCS_COMPRESSED.
Referenced by getCurrentValueCode(), and sync().
00184 { 00185 return pBatch->mode == LCS_COMPRESSED; 00186 }
bool LcsColumnReader::batchIsFixed | ( | ) | const [inline] |
const PBuffer LcsColumnReader::getCurrentValue | ( | ) | [inline] |
Returns current value code for a compressed batch entry.
Definition at line 199 of file LcsColumnReader.h.
00200 { 00201 return (this->*pGetCurrentValueFunc)(); 00202 }
uint16_t LcsColumnReader::getCurrentValueCode | ( | ) | const |
Gets the code (between 0 and GetBatchValCount() - 1) of the current value for a compressed batch.
Definition at line 168 of file LcsColumnReader.cpp.
References batchIsCompressed(), LcsClusterReader::getRangePos(), origin, pFuncReadBitVec, and pScan.
Referenced by applyFilters(), and getCompressedValue().
00169 { 00170 assert(batchIsCompressed()); 00171 uint16_t nValCode; 00172 pFuncReadBitVec(&nValCode, origin, pScan->getRangePos()); 00173 return nValCode; 00174 }
uint LcsColumnReader::getBatchValCount | ( | ) | const [inline] |
Returns number of distinct values in batch.
Definition at line 217 of file LcsColumnReader.h.
Referenced by buildContainsMap(), findBounds(), findVal(), and LbmGeneratorExecStream::generateBitmaps().
const PBuffer LcsColumnReader::getBatchBase | ( | ) | const [inline] |
Returns base pointer of batch.
Definition at line 225 of file LcsColumnReader.h.
Referenced by getVariableValue().
00226 { 00227 return pBase; 00228 }
const uint16_t* LcsColumnReader::getBatchOffsets | ( | ) | const [inline] |
Returns table of offsets from base.
Definition at line 233 of file LcsColumnReader.h.
Referenced by getVariableValue().
Gets iValCode-th value.
iValCode | code corresponding to value to be retrieved from batch |
Definition at line 243 of file LcsColumnReader.h.
Referenced by findVal(), LbmGeneratorExecStream::generateBitmaps(), and getCompressedValue().
00244 { 00245 return (const PBuffer) (getBatchBase() + getBatchOffsets()[iValCode]); 00246 }
Reads up to "count" value-codes into the "pValCodes" table.
If we reach the end of the batch, "*pActCount", the number of value-codes actually read, is less than "count".
For example, if we are at rid 1000, then "pValCodes[i]" will hold the code for the value of the indexed column at rid 1000 + i. To convert this code to a value, use "GetBatchValue(pValCodes[i])".
Note: This method may be used only with compressed batches.
count | how many value codes to read | |
pValCodes | output param for table for "count" value codes | |
pActCount | output param for actual number of value codes returned |
Definition at line 160 of file LcsColumnReader.cpp.
References LcsClusterReader::getRangePos(), LcsClusterReader::getRangeRowsLeft(), iV, min(), origin, pScan, readBitVecs(), and width.
Referenced by LbmGeneratorExecStream::generateBitmaps().
00162 { 00163 *pActCount = std::min(count, pScan->getRangeRowsLeft()); 00164 readBitVecs( 00165 pValCodes, iV, width, origin, pScan->getRangePos(), *pActCount); 00166 }
struct LcsResidualColumnFilters& LcsColumnReader::getFilters | ( | ) | [inline] |
Definition at line 270 of file LcsColumnReader.h.
00271 { 00272 return filters; 00273 }
bool LcsColumnReader::applyFilters | ( | TupleDescriptor & | projDescriptor, | |
TupleData & | outputTupleData | |||
) |
Applies the filters.
projDescriptor | TupleDescriptor for outputTupleData | |
outputTupleData | is the TupleData to compare with |
Definition at line 176 of file LcsColumnReader.cpp.
References TupleDescriptor::compareTuples(), LcsResidualColumnFilters::filterData, LcsResidualColumnFilters::filteringBitmap, filters, getCurrentValueCode(), LcsResidualColumnFilters::inputKeyDesc, LcsResidualColumnFilters::lowerBoundProj, LcsResidualColumnFilters::readerKeyProj, SEARCH_CLOSED_LOWER, SEARCH_CLOSED_UPPER, SEARCH_UNBOUNDED_LOWER, SEARCH_UNBOUNDED_UPPER, and LcsResidualColumnFilters::upperBoundProj.
00178 { 00179 if (!filters.filteringBitmap.empty()) { 00180 /* 00181 * bitmap filtering 00182 */ 00183 return (filters.filteringBitmap.test(getCurrentValueCode())); 00184 } 00185 00186 for (uint k = 0; k < filters.filterData.size(); k++) { 00187 LcsResidualFilter *filter = filters.filterData[k].get(); 00188 00189 if (filter->lowerBoundDirective != SEARCH_UNBOUNDED_LOWER) { 00190 int c = filters.inputKeyDesc.compareTuples( 00191 filter->boundData, filters.lowerBoundProj, 00192 outputTupleData, filters.readerKeyProj); 00193 00194 if (filter->lowerBoundDirective == SEARCH_CLOSED_LOWER) { 00195 if (c > 0) { 00196 continue; 00197 } 00198 } else { 00199 if (c >= 0) { 00200 continue; 00201 } 00202 } 00203 } 00204 00205 if (filter->upperBoundDirective == SEARCH_UNBOUNDED_UPPER) { 00206 return true; 00207 } 00208 00209 int c = filters.inputKeyDesc.compareTuples( 00210 filter->boundData, filters.upperBoundProj, 00211 outputTupleData, filters.readerKeyProj); 00212 00213 if (filter->upperBoundDirective == SEARCH_CLOSED_UPPER) { 00214 if (c >= 0) { 00215 return true; 00216 } 00217 } else { 00218 if (c > 0) { 00219 return true; 00220 } 00221 } 00222 } 00223 00224 return false; 00225 }
LcsClusterReader* LcsColumnReader::pScan [private] |
Parent cluster reader object.
Definition at line 41 of file LcsColumnReader.h.
Referenced by getCurrentValueCode(), getFixedValue(), getVariableValue(), readCompressedBatch(), and sync().
uint LcsColumnReader::colOrd [private] |
Ordinal of column in cluster (0-based).
Definition at line 46 of file LcsColumnReader.h.
Referenced by sync().
PLcsBatchDir LcsColumnReader::pBatch [private] |
Batch corresponding to column.
Definition at line 51 of file LcsColumnReader.h.
Referenced by getFixedValue(), and sync().
PBuffer LcsColumnReader::pValues [private] |
Values of the batch.
Definition at line 56 of file LcsColumnReader.h.
Referenced by getFixedValue(), and sync().
PBuffer LcsColumnReader::pBase [private] |
Base address from which offsets are stored.
Definition at line 61 of file LcsColumnReader.h.
Referenced by sync().
PtrVec LcsColumnReader::origin [private] |
Pointers to the origin of each bit vector.
Definition at line 66 of file LcsColumnReader.h.
Referenced by getCurrentValueCode(), readCompressedBatch(), and sync().
WidthVec LcsColumnReader::width [private] |
Width of bit vectors.
Definition at line 71 of file LcsColumnReader.h.
Referenced by readCompressedBatch(), and sync().
uint LcsColumnReader::iV [private] |
Pointer to appropriate bit vector read function.
Definition at line 80 of file LcsColumnReader.h.
Referenced by getCurrentValueCode(), and sync().
const PBuffer(LcsColumnReader:: * LcsColumnReader::pGetCurrentValueFunc)() [private] |
Pointer to function that retrieves the current value of the column from a fixed mode batch.
Referenced by sync().
Filters associated with this column.
Definition at line 91 of file LcsColumnReader.h.
Referenced by applyFilters(), buildContainsMap(), findBounds(), findVal(), and sync().
TupleProjection LcsColumnReader::allProj [private] |
Projection for readerKeyData.
Definition at line 96 of file LcsColumnReader.h.
Referenced by findVal().