00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef Fennel_LcsHash_Included
00023 #define Fennel_LcsHash_Included
00024
00025 #include "fennel/tuple/TupleData.h"
00026 #include "fennel/tuple/TupleDataWithBuffer.h"
00027 #include "fennel/tuple/TupleDescriptor.h"
00028 #include "fennel/tuple/UnalignedAttributeAccessor.h"
00029 #include "fennel/lucidera/colstore/LcsClusterNodeWriter.h"
00030
00031
00032 FENNEL_BEGIN_NAMESPACE
00033
00034
00035
00036
00037
00043 class FENNEL_LCS_EXPORT LcsHashValOrd
00044 {
00045 private:
00046
00050 uint16_t valOrd;
00051
00052 public:
00053
00054 LcsHashValOrd() {}
00055 ~LcsHashValOrd() {}
00056
00057 inline explicit LcsHashValOrd(LcsHashValOrd const &other);
00058
00062 inline LcsHashValOrd& operator=(uint16_t valOrdInit);
00063
00069 inline uint16_t getValOrd();
00070
00076 inline void setValOrd(uint16_t valOrdInit);
00077
00081 inline bool isValueInBatch();
00082
00086 inline void setValueInBatch();
00087
00091 inline void clearValueInBatch();
00092 };
00093
00094
00099 struct LcsHashValueNode
00100 {
00104 uint16_t next;
00105
00110 LcsHashValOrd valueOrd;
00111
00115 uint16_t valueOffset;
00116
00121 uint16_t sortedOrd;
00122 };
00123
00124
00130 class FENNEL_LCS_EXPORT LcsHashTable
00131 {
00132 private:
00133
00137 PBuffer hashBlock;
00138
00142 uint hashBlockSize;
00143
00147 uint hashTableSize;
00148
00152 uint nextValueNode;
00153
00154 public:
00155
00167 uint16_t *entry;
00168
00172 LcsHashValueNode *valueNodes;
00173
00174 inline explicit LcsHashTable();
00175
00184 void init(PBuffer hashBlockInit, uint hashBlockSizeInit);
00185
00189 inline void resetHash();
00190
00195 inline void resetBatch();
00196
00200 inline uint numHashEntries();
00201
00206 inline LcsHashValueNode* getNewValueNode();
00207
00215 inline void insertNewValueNode(uint key, LcsHashValueNode *newNode);
00216
00224 inline void undoNewValueNode(uint key);
00225
00233 inline LcsHashValueNode* getFirstValueNode(uint key);
00234
00242 inline LcsHashValueNode* getNextValueNode(LcsHashValueNode *pValueNode);
00243
00252 inline bool isFull(uint leftOvers = 0);
00253 };
00254
00255
00259 enum LcsUndoState { NOTHING, NEWENTRY, NEWBATCHVALUE };
00260
00261
00265 struct LcsUndoType
00266 {
00270 LcsUndoState what;
00271
00275 uint key;
00276
00280 uint origMaxValueSize;
00281
00285 LcsHashValueNode *vPtr;
00286
00290 inline LcsUndoType();
00291
00303 inline void set(
00304 LcsUndoState whatInit,
00305 uint keyInit,
00306 uint origMaxValueSizeInit,
00307 LcsHashValueNode *vPtrInit);
00308
00312 inline void reset();
00313 };
00314
00315
00322 class FENNEL_LCS_EXPORT LcsCompareColKeyUsingOffsetIndex
00323 {
00324 private:
00325
00330 LcsHashTable *hashTable;
00331
00335 SharedLcsClusterNodeWriter clusterBlockWriter;
00336
00342 uint columnId;
00343
00347 TupleDataWithBuffer colTuple1;
00348 TupleDataWithBuffer colTuple2;
00349
00353 TupleDescriptor colTupleDesc;
00354
00355 UnalignedAttributeAccessor attrAccessor;
00356
00357 public:
00358
00372 explicit LcsCompareColKeyUsingOffsetIndex(
00373 SharedLcsClusterNodeWriter clusterBlockWriterInit,
00374 LcsHashTable *hashTableInit,
00375 TupleDescriptor const &colTupleDescInit,
00376 uint columnIdInit,
00377 UnalignedAttributeAccessor const &attrAccessorInit);
00378
00379 ~LcsCompareColKeyUsingOffsetIndex() {}
00380
00394 bool lessThan(
00395 const uint16_t colKeyOffsetIndex1,
00396 const uint16_t colKeyOffsetIndex2);
00397 };
00398
00402 class FENNEL_LCS_EXPORT LcsCompare
00403 {
00404 private:
00405
00410 SharedLcsCompareColKeyUsingOffsetIndex compareInstance;
00411
00412 public:
00413
00420 inline explicit LcsCompare(
00421 SharedLcsCompareColKeyUsingOffsetIndex compareInstanceInit);
00422
00434 inline bool operator()(
00435 const uint16_t colKeyOffsetIndex1,
00436 const uint16_t colKeyOffsetIndex2);
00437 };
00438
00439
00447 class FENNEL_LCS_EXPORT LcsHash
00448 {
00449 private:
00450
00454 uint columnId;
00455
00461 LcsHashTable hash;
00462
00466 SharedLcsClusterNodeWriter clusterBlockWriter;
00467
00471 TupleDescriptor colTupleDesc;
00472
00476 TupleDataWithBuffer colTuple;
00477
00481 UnalignedAttributeAccessor attrAccessor;
00482
00486 TupleDataWithBuffer searchTuple;
00487
00491 boost::scoped_array<FixedBuffer> colTupleBuffer;
00492
00497 uint16_t valCnt;
00498
00503 uint maxValueSize;
00504
00508 LcsUndoType undo;
00509
00513 uint8_t *magicTable;
00514
00518 uint numChecks;
00519
00523 uint numMatches;
00524
00528 SharedLcsCompareColKeyUsingOffsetIndex compareInst;
00529
00538 uint computeKey(PBuffer dataWithLen);
00539
00553 bool search(
00554 uint key, PBuffer dataWithLen,
00555 LcsHashValOrd *valOrd, LcsHashValueNode **v);
00556
00557 public:
00558 explicit LcsHash();
00559 ~LcsHash()
00560 {
00561 }
00562
00576 void init(
00577 PBuffer hashBlockInit,
00578 SharedLcsClusterNodeWriter clusterBlockWriterInit,
00579 TupleDescriptor const &colTupleDescInit,
00580 uint columnIdInit,
00581 uint blockSizeInit);
00582
00593 void insert(
00594 TupleDatum &colTupleDatum,
00595 LcsHashValOrd *valOrd,
00596 bool *undoInsert);
00597
00608 void insert(
00609 PBuffer dataWithLen,
00610 LcsHashValOrd *valOrd,
00611 bool *undoInsert);
00612
00620 void undoInsert(TupleDatum &colTupleDatum);
00621
00627 void undoInsert(PBuffer dataWithLen);
00628
00629
00638 void prepareFixedOrVariableBatch(uint8_t *rowArray, uint numRows);
00639
00654 void prepareCompressedBatch(
00655 uint8_t *rowArray,
00656 uint numRows,
00657 uint16_t *numVals,
00658 uint16_t *offsetIndexVector);
00659
00664 void clearFixedEntries();
00665
00673 void startNewBatch(uint leftOvers);
00674
00684 void restore(uint numVals, uint16_t lastValOff);
00685
00692 inline uint getMaxValueSize();
00693
00703 inline bool isHashFull(uint leftOvers = 0);
00704 };
00705
00706
00707
00708
00709
00710
00711 inline LcsHashValOrd::LcsHashValOrd(LcsHashValOrd const &other)
00712 {
00713 valOrd = other.valOrd;
00714 }
00715
00716 inline LcsHashValOrd& LcsHashValOrd::operator=(uint16_t valOrdInit)
00717 {
00718 setValOrd(valOrdInit);
00719 return *this;
00720 }
00721
00722 inline uint16_t LcsHashValOrd::getValOrd()
00723 {
00724 return (uint16_t)(valOrd & ~(1<<15));
00725 }
00726
00727 inline void LcsHashValOrd::setValOrd(uint16_t valOrdInit)
00728 {
00729 valOrd = (valOrdInit & ~(1<<15));
00730 }
00731
00732 inline bool LcsHashValOrd::isValueInBatch()
00733 {
00734 return (valOrd & (1<<15));
00735 }
00736
00737 inline void LcsHashValOrd::setValueInBatch()
00738 {
00739 valOrd |= 1<<15;
00740 }
00741
00742 inline void LcsHashValOrd::clearValueInBatch()
00743 {
00744 valOrd &= ~(1<<15);
00745 }
00746
00747
00748
00749
00750
00751
00752 inline LcsUndoType::LcsUndoType()
00753 {
00754 reset();
00755 }
00756
00757 inline void LcsUndoType::set(
00758 LcsUndoState whatInit,
00759 uint keyInit,
00760 uint origMaxValueSizeInit,
00761 LcsHashValueNode *vPtrInit)
00762 {
00763 what = whatInit;
00764 key = keyInit;
00765 origMaxValueSize = origMaxValueSizeInit;;
00766 vPtr = vPtrInit;
00767 }
00768
00769 inline void LcsUndoType::reset()
00770 {
00771 what = NOTHING;
00772 key = 0;
00773 origMaxValueSize = 0;
00774 vPtr = 0;
00775 }
00776
00777
00778
00779
00780
00781
00782 inline LcsHashTable::LcsHashTable()
00783 {
00784 hashBlock = NULL;
00785 hashTableSize = 0;
00786 nextValueNode = 0;
00787 }
00788
00789 inline void LcsHashTable::resetHash()
00790 {
00791 memset(hashBlock, 0, hashBlockSize);
00792 nextValueNode = 0;
00793 }
00794
00795 inline void LcsHashTable::resetBatch()
00796 {
00797 for (int i = 0; i < nextValueNode; i++) {
00798 (&(valueNodes[i].valueOrd))->clearValueInBatch();
00799 }
00800 }
00801
00802 inline uint LcsHashTable::numHashEntries()
00803 {
00804 return hashTableSize;
00805 }
00806
00807 inline LcsHashValueNode* LcsHashTable::getNewValueNode()
00808 {
00809 return &(valueNodes[nextValueNode]);
00810 }
00811
00812 inline void LcsHashTable::insertNewValueNode(
00813 uint key,
00814 LcsHashValueNode *newNode)
00815 {
00816 newNode->next = (uint16_t)entry[key];
00817
00818
00819
00820
00821 entry[key] = (uint16_t)((uint8_t*)newNode - hashBlock);
00822
00823
00824
00825
00826 nextValueNode++;
00827 }
00828
00829 inline void LcsHashTable::undoNewValueNode(uint key)
00830 {
00831 entry[key] = (uint16_t)
00832 ((LcsHashValueNode*)(hashBlock + entry[key]))->next;
00833 nextValueNode--;
00834 }
00835
00836 inline LcsHashValueNode* LcsHashTable::getFirstValueNode(uint key)
00837 {
00838 uint16_t offset = entry[key];
00839
00840 if (offset) {
00841 return (LcsHashValueNode*) (hashBlock + offset);
00842 } else {
00843 return NULL;
00844 }
00845 }
00846
00847 inline LcsHashValueNode* LcsHashTable::getNextValueNode(
00848 LcsHashValueNode *pValueNode)
00849 {
00850 uint16_t offset = pValueNode->next;
00851
00852 if (offset) {
00853 return (LcsHashValueNode*) (hashBlock + offset);
00854 } else {
00855 return NULL;
00856 }
00857 }
00858
00859 inline bool LcsHashTable::isFull(uint leftOvers)
00860 {
00861
00862
00863
00864
00865 return ((PBuffer)(&(valueNodes[nextValueNode])
00866 + (leftOvers + 1) * sizeof(LcsHashValueNode))
00867 >= (PBuffer) (hashBlock + hashBlockSize));
00868 }
00869
00870
00871
00872
00873
00874
00875 inline LcsCompare::LcsCompare(
00876 SharedLcsCompareColKeyUsingOffsetIndex compareInstanceInit)
00877 {
00878 compareInstance = compareInstanceInit;
00879 }
00880
00881 inline bool LcsCompare::operator()(
00882 const uint16_t colKeyOffsetIndex1,
00883 const uint16_t colKeyOffsetIndex2)
00884 {
00885 return compareInstance->lessThan(colKeyOffsetIndex1, colKeyOffsetIndex2);
00886 }
00887
00888
00889
00890
00891
00892
00893 inline uint LcsHash::getMaxValueSize()
00894 {
00895 return maxValueSize;
00896 }
00897
00898 inline bool LcsHash::isHashFull(uint leftOvers)
00899 {
00900 return hash.isFull(leftOvers);
00901 }
00902
00903 FENNEL_END_NAMESPACE
00904
00905 #endif
00906
00907