LcsHash.h

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/lucidera/colstore/LcsHash.h#16 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 LucidEra, Inc.
00005 // Copyright (C) 2005-2009 The Eigenbase Project
00006 //
00007 // This program is free software; you can redistribute it and/or modify it
00008 // under the terms of the GNU General Public License as published by the Free
00009 // Software Foundation; either version 2 of the License, or (at your option)
00010 // any later version approved by The Eigenbase Project.
00011 //
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 //
00017 // You should have received a copy of the GNU General Public License
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 */
00021 
00022 #ifndef Fennel_LcsHash_Included
00023 #define Fennel_LcsHash_Included
00024 
00025 #include "fennel/tuple/TupleData.h"
00026 #include "fennel/tuple/TupleDataWithBuffer.h"
00027 #include "fennel/tuple/TupleDescriptor.h"
00028 #include "fennel/tuple/UnalignedAttributeAccessor.h"
00029 #include "fennel/lucidera/colstore/LcsClusterNodeWriter.h"
00030 
00031 
00032 FENNEL_BEGIN_NAMESPACE
00033 
00034 /************************************************
00035   classes and defines used only by LcsHash class
00036  ************************************************/
00037 
00043 class FENNEL_LCS_EXPORT LcsHashValOrd
00044 {
00045 private:
00046 
00050     uint16_t    valOrd;
00051 
00052 public:
00053 
00054     LcsHashValOrd() {}
00055     ~LcsHashValOrd() {}
00056 
00057     inline explicit LcsHashValOrd(LcsHashValOrd const &other);
00058 
00062     inline LcsHashValOrd& operator=(uint16_t valOrdInit);
00063 
00069     inline uint16_t getValOrd();
00070 
00076     inline void setValOrd(uint16_t valOrdInit);
00077 
00081     inline bool isValueInBatch();
00082 
00086     inline void setValueInBatch();
00087 
00091     inline void clearValueInBatch();
00092 };
00093 
00094 
00099 struct LcsHashValueNode
00100 {
00104     uint16_t      next;
00105 
00110     LcsHashValOrd valueOrd;
00111 
00115     uint16_t      valueOffset;
00116 
00121     uint16_t      sortedOrd;
00122 };
00123 
00124 
00130 class FENNEL_LCS_EXPORT LcsHashTable
00131 {
00132 private:
00133 
00137     PBuffer           hashBlock;
00138 
00142     uint              hashBlockSize;
00143 
00147     uint              hashTableSize;
00148 
00152     uint              nextValueNode;
00153 
00154 public:
00155 
00167     uint16_t          *entry;
00168 
00172     LcsHashValueNode  *valueNodes;
00173 
00174     inline explicit LcsHashTable();
00175 
00184     void init(PBuffer hashBlockInit, uint hashBlockSizeInit);
00185 
00189     inline void resetHash();
00190 
00195     inline void resetBatch();
00196 
00200     inline uint numHashEntries();
00201 
00206     inline LcsHashValueNode* getNewValueNode();
00207 
00215     inline void insertNewValueNode(uint key,  LcsHashValueNode *newNode);
00216 
00224     inline void undoNewValueNode(uint key);
00225 
00233     inline LcsHashValueNode* getFirstValueNode(uint key);
00234 
00242     inline LcsHashValueNode* getNextValueNode(LcsHashValueNode *pValueNode);
00243 
00252     inline bool isFull(uint leftOvers = 0);
00253 };
00254 
00255 
00259 enum LcsUndoState { NOTHING, NEWENTRY, NEWBATCHVALUE };
00260 
00261 
00265 struct LcsUndoType
00266 {
00270     LcsUndoState     what;
00271 
00275     uint             key;
00276 
00280     uint             origMaxValueSize;
00281 
00285     LcsHashValueNode *vPtr;
00286 
00290     inline LcsUndoType();
00291 
00303     inline void set(
00304         LcsUndoState whatInit,
00305         uint keyInit,
00306         uint origMaxValueSizeInit,
00307         LcsHashValueNode *vPtrInit);
00308 
00312     inline void reset();
00313 };
00314 
00315 
00322 class FENNEL_LCS_EXPORT LcsCompareColKeyUsingOffsetIndex
00323 {
00324 private:
00325 
00330     LcsHashTable            *hashTable;
00331 
00335     SharedLcsClusterNodeWriter    clusterBlockWriter;
00336 
00342     uint                     columnId;
00343 
00347     TupleDataWithBuffer      colTuple1;
00348     TupleDataWithBuffer      colTuple2;
00349 
00353     TupleDescriptor          colTupleDesc;
00354 
00355     UnalignedAttributeAccessor attrAccessor;
00356 
00357 public:
00358 
00372     explicit LcsCompareColKeyUsingOffsetIndex(
00373         SharedLcsClusterNodeWriter clusterBlockWriterInit,
00374         LcsHashTable *hashTableInit,
00375         TupleDescriptor const &colTupleDescInit,
00376         uint columnIdInit,
00377         UnalignedAttributeAccessor const &attrAccessorInit);
00378 
00379     ~LcsCompareColKeyUsingOffsetIndex() {}
00380 
00394     bool lessThan(
00395         const uint16_t colKeyOffsetIndex1,
00396         const uint16_t colKeyOffsetIndex2);
00397 };
00398 
00402 class FENNEL_LCS_EXPORT LcsCompare
00403 {
00404 private:
00405 
00410     SharedLcsCompareColKeyUsingOffsetIndex compareInstance;
00411 
00412 public:
00413 
00420     inline explicit LcsCompare(
00421         SharedLcsCompareColKeyUsingOffsetIndex compareInstanceInit);
00422 
00434     inline bool operator()(
00435         const uint16_t colKeyOffsetIndex1,
00436         const uint16_t colKeyOffsetIndex2);
00437 };
00438 
00439 
00447 class FENNEL_LCS_EXPORT LcsHash
00448 {
00449 private:
00450 
00454     uint                  columnId;
00455 
00461     LcsHashTable          hash;
00462 
00466     SharedLcsClusterNodeWriter clusterBlockWriter;
00467 
00471     TupleDescriptor       colTupleDesc;
00472 
00476     TupleDataWithBuffer   colTuple;
00477 
00481     UnalignedAttributeAccessor attrAccessor;
00482 
00486     TupleDataWithBuffer   searchTuple;
00487 
00491     boost::scoped_array<FixedBuffer> colTupleBuffer;
00492 
00497     uint16_t              valCnt;
00498 
00503     uint                  maxValueSize;
00504 
00508     LcsUndoType           undo;
00509 
00513     uint8_t              *magicTable;
00514 
00518     uint                  numChecks;
00519 
00523     uint                  numMatches;
00524 
00528     SharedLcsCompareColKeyUsingOffsetIndex compareInst;
00529 
00538     uint computeKey(PBuffer dataWithLen);
00539 
00553     bool search(
00554         uint key, PBuffer dataWithLen,
00555         LcsHashValOrd *valOrd, LcsHashValueNode **v);
00556 
00557 public:
00558     explicit LcsHash();
00559     ~LcsHash()
00560     {
00561     }
00562 
00576     void init(
00577         PBuffer hashBlockInit,
00578         SharedLcsClusterNodeWriter clusterBlockWriterInit,
00579         TupleDescriptor const &colTupleDescInit,
00580         uint columnIdInit,
00581         uint blockSizeInit);
00582 
00593     void insert(
00594         TupleDatum &colTupleDatum,
00595         LcsHashValOrd *valOrd,
00596         bool *undoInsert);
00597 
00608     void insert(
00609         PBuffer dataWithLen,
00610         LcsHashValOrd *valOrd,
00611         bool *undoInsert);
00612 
00620     void undoInsert(TupleDatum &colTupleDatum);
00621 
00627     void undoInsert(PBuffer dataWithLen);
00628 
00629 
00638     void prepareFixedOrVariableBatch(uint8_t *rowArray, uint numRows);
00639 
00654     void prepareCompressedBatch(
00655         uint8_t *rowArray,
00656         uint     numRows,
00657         uint16_t *numVals,
00658         uint16_t *offsetIndexVector);
00659 
00664     void clearFixedEntries();
00665 
00673     void startNewBatch(uint leftOvers);
00674 
00684     void restore(uint numVals, uint16_t lastValOff);
00685 
00692     inline uint getMaxValueSize();
00693 
00703     inline bool isHashFull(uint leftOvers = 0);
00704 };
00705 
00706 
00707 /*******************************************************
00708   Definitions of inline methods for class LcsHashValOrd
00709  *******************************************************/
00710 
00711 inline LcsHashValOrd::LcsHashValOrd(LcsHashValOrd const &other)
00712 {
00713     valOrd = other.valOrd;
00714 }
00715 
00716 inline LcsHashValOrd& LcsHashValOrd::operator=(uint16_t valOrdInit)
00717 {
00718     setValOrd(valOrdInit);
00719     return *this;
00720 }
00721 
00722 inline uint16_t LcsHashValOrd::getValOrd()
00723 {
00724     return (uint16_t)(valOrd & ~(1<<15));
00725 }
00726 
00727 inline void LcsHashValOrd::setValOrd(uint16_t valOrdInit)
00728 {
00729     valOrd = (valOrdInit & ~(1<<15));
00730 }
00731 
00732 inline bool LcsHashValOrd::isValueInBatch()
00733 {
00734     return (valOrd & (1<<15));
00735 }
00736 
00737 inline void LcsHashValOrd::setValueInBatch()
00738 {
00739     valOrd |= 1<<15;
00740 }
00741 
00742 inline void LcsHashValOrd::clearValueInBatch()
00743 {
00744     valOrd &= ~(1<<15);
00745 }
00746 
00747 
00748 /*****************************************************
00749   Definitions of inline methods for class LcsUndoType
00750  *****************************************************/
00751 
00752 inline LcsUndoType::LcsUndoType()
00753 {
00754     reset();
00755 }
00756 
00757 inline void LcsUndoType::set(
00758     LcsUndoState whatInit,
00759     uint keyInit,
00760     uint origMaxValueSizeInit,
00761     LcsHashValueNode *vPtrInit)
00762 {
00763     what = whatInit;
00764     key = keyInit;
00765     origMaxValueSize = origMaxValueSizeInit;;
00766     vPtr = vPtrInit;
00767 }
00768 
00769 inline void LcsUndoType::reset()
00770 {
00771     what = NOTHING;
00772     key = 0;
00773     origMaxValueSize = 0;
00774     vPtr = 0;
00775 }
00776 
00777 
00778 /******************************************************
00779   Definitions of inline methods for class LcsHashTable
00780  ******************************************************/
00781 
00782 inline LcsHashTable::LcsHashTable()
00783 {
00784     hashBlock = NULL;
00785     hashTableSize = 0;
00786     nextValueNode = 0;
00787 }
00788 
00789 inline void LcsHashTable::resetHash()
00790 {
00791     memset(hashBlock, 0, hashBlockSize);
00792     nextValueNode = 0;
00793 }
00794 
00795 inline void LcsHashTable::resetBatch()
00796 {
00797     for (int i = 0; i < nextValueNode; i++) {
00798         (&(valueNodes[i].valueOrd))->clearValueInBatch();
00799     }
00800 }
00801 
00802 inline uint LcsHashTable::numHashEntries()
00803 {
00804     return hashTableSize;
00805 }
00806 
00807 inline LcsHashValueNode* LcsHashTable::getNewValueNode()
00808 {
00809     return &(valueNodes[nextValueNode]);
00810 }
00811 
00812 inline void LcsHashTable::insertNewValueNode(
00813     uint key,
00814     LcsHashValueNode *newNode)
00815 {
00816     newNode->next = (uint16_t)entry[key];
00817 
00818     /*
00819       Insert at the beginning.
00820     */
00821     entry[key] = (uint16_t)((uint8_t*)newNode - hashBlock);
00822 
00823     /*
00824       Bump up the value for the next node to give out.
00825     */
00826     nextValueNode++;
00827 }
00828 
00829 inline void LcsHashTable::undoNewValueNode(uint key)
00830 {
00831     entry[key] = (uint16_t)
00832         ((LcsHashValueNode*)(hashBlock + entry[key]))->next;
00833     nextValueNode--;
00834 }
00835 
00836 inline LcsHashValueNode* LcsHashTable::getFirstValueNode(uint key)
00837 {
00838     uint16_t offset = entry[key];
00839 
00840     if (offset) {
00841         return (LcsHashValueNode*) (hashBlock + offset);
00842     } else {
00843         return NULL;
00844     }
00845 }
00846 
00847 inline LcsHashValueNode* LcsHashTable::getNextValueNode(
00848     LcsHashValueNode *pValueNode)
00849 {
00850     uint16_t offset = pValueNode->next;
00851 
00852     if (offset) {
00853         return (LcsHashValueNode*) (hashBlock + offset);
00854     } else {
00855         return NULL;
00856     }
00857 }
00858 
00859 inline bool LcsHashTable::isFull(uint leftOvers)
00860 {
00861     /*
00862       need to leave one spot for sorting and leftsOvers that
00863       will be added
00864     */
00865     return ((PBuffer)(&(valueNodes[nextValueNode])
00866             + (leftOvers + 1) * sizeof(LcsHashValueNode))
00867         >= (PBuffer) (hashBlock + hashBlockSize));
00868 }
00869 
00870 
00871 /****************************************************
00872   Definitions of inline methods for class LcsCompare
00873  ****************************************************/
00874 
00875 inline LcsCompare::LcsCompare(
00876     SharedLcsCompareColKeyUsingOffsetIndex compareInstanceInit)
00877 {
00878     compareInstance = compareInstanceInit;
00879 }
00880 
00881 inline bool LcsCompare::operator()(
00882     const uint16_t colKeyOffsetIndex1,
00883     const uint16_t colKeyOffsetIndex2)
00884 {
00885     return compareInstance->lessThan(colKeyOffsetIndex1, colKeyOffsetIndex2);
00886 }
00887 
00888 
00889 /*************************************************
00890   Definitions of inline methods for class LcsHash
00891  *************************************************/
00892 
00893 inline uint LcsHash::getMaxValueSize()
00894 {
00895     return maxValueSize;
00896 }
00897 
00898 inline bool LcsHash::isHashFull(uint leftOvers)
00899 {
00900     return hash.isFull(leftOvers);
00901 }
00902 
00903 FENNEL_END_NAMESPACE
00904 
00905 #endif
00906 
00907 // End LcsHash.h

Generated on Mon Jun 22 04:00:19 2009 for Fennel by  doxygen 1.5.1