00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef Fennel_LhxHashTable_Included
00024 #define Fennel_LhxHashTable_Included
00025
00026 #include "fennel/hashexe/LhxHashGenerator.h"
00027 #include "fennel/tuple/TupleData.h"
00028 #include "fennel/tuple/TupleDataWithBuffer.h"
00029 #include "fennel/tuple/TupleDescriptor.h"
00030 #include "fennel/tuple/TupleAccessor.h"
00031 #include "fennel/tuple/TupleProjectionAccessor.h"
00032 #include "fennel/segment/SegPageLock.h"
00033 #include "fennel/hashexe/LhxHashBase.h"
00034 #include "fennel/exec/AggComputer.h"
00035 #include "fennel/common/FennelExcn.h"
00036 #include "fennel/tuple/TupleOverflowExcn.h"
00037
00038 #include "math.h"
00039
00040 using namespace std;
00041
00042 FENNEL_BEGIN_NAMESPACE
00043
00044
00045
00046
00047
00055 class FENNEL_HASHEXE_EXPORT LhxHashNodeAccessor
00056 {
00057
00058
00059
00060
00061
00062 uint nextNodeOffset;
00063
00064
00065
00066
00067 uint nodeBufferOffset;
00068
00069
00070
00071
00072 PBuffer nodePtr;
00073
00074 public:
00075
00076
00077
00078
00079
00080
00081
00082
00083 LhxHashNodeAccessor();
00084
00091 LhxHashNodeAccessor(uint nextNodeOffsetInit);
00092
00096 PBuffer getCurrent();
00097
00104 void setCurrent(PBuffer nodePtrInit);
00105
00109 void reset();
00110
00114 PBuffer getBuffer();
00115
00119 PBuffer getNext();
00120
00124 PBuffer getNextLocation();
00125
00131 void setNext(PBuffer nextNode);
00132
00139 void setNext(PBuffer inputNode, PBuffer nextNode);
00140
00144 uint getNextFieldSize();
00145
00146
00147
00152 uint getBufferOffset();
00153 };
00154
00155
00156
00157
00158 class FENNEL_HASHEXE_EXPORT LhxHashDataAccessor
00159 : public LhxHashNodeAccessor
00160 {
00161
00162
00163
00164 TupleDescriptor dataDescriptor;
00165
00166
00167
00168
00169 TupleData dataTuple;
00170
00171
00172
00173
00174 TupleAccessor dataAccessor;
00175
00176 public:
00177 LhxHashDataAccessor() : LhxHashNodeAccessor() {}
00178
00184 void init(TupleDescriptor const &inputDataDesc);
00185
00193 void setCurrent(PBuffer nodePtrInit, bool valid);
00194
00200 inline uint getAvgStorageSize();
00201
00202
00203
00204
00205
00206
00212 inline uint getStorageSize(TupleData const &inputTuple);
00213
00219 inline uint getDiskStorageSize(TupleData const &inputTuple);
00220
00230 inline void checkStorageSize(
00231 TupleData const &inputTuple,
00232 uint maxBufferSize);
00233
00239 inline void pack(TupleData const &inputTuple);
00240
00248 void unpack(TupleData &outputTuple, TupleProjection &destProj);
00249
00253 string toString();
00254 };
00255
00256 class FENNEL_HASHEXE_EXPORT LhxHashKeyAccessor
00257 : public LhxHashNodeAccessor
00258 {
00259
00260
00261
00262 uint firstDataOffset;
00263
00264
00265
00266
00267
00268 uint isMatchedOffset;
00269
00270
00271
00272
00273 uint nextSlotOffset;
00274
00275
00276
00277
00278 TupleDescriptor keyDescriptor;
00279
00280
00281
00282
00283 TupleData keyTuple;
00284
00285
00286
00287
00288 TupleAccessor keyAccessor;
00289
00290
00291
00292
00293 TupleProjection keyColsProj;
00294
00295 TupleDescriptor keyColsDesc;
00296
00297
00298
00299
00300 TupleProjection aggsProj;
00301
00302
00303
00304
00305 TupleData inputKey;
00306
00307
00308
00309
00310 TupleData currentKey;
00311
00312
00313
00314
00315 LhxHashDataAccessor firstData;
00316
00317 public:
00318 LhxHashKeyAccessor();
00319
00327 void init(
00328 TupleDescriptor const &keyDescInit,
00329 TupleProjection const &keyColsProjInit,
00330 TupleProjection const &aggsProjInit);
00331
00339 void setCurrent(PBuffer nodePtrInit, bool valid);
00340
00346 inline PBuffer getFirstData();
00347
00353 inline void setFirstData(PBuffer inputFirstData);
00354
00360 inline PBuffer *getNextSlot();
00361
00367 inline void setNextSlot(PBuffer *nextSlot);
00368
00374 inline bool isMatched();
00375
00381 inline void setMatched(bool matched);
00382
00388 void addData(PBuffer inputData);
00389
00395 inline uint getAvgStorageSize();
00396
00402 inline uint getStorageSize(TupleData const &inputTuple);
00403
00409 inline uint getDiskStorageSize(TupleData const &inputTuple);
00410
00420 inline void checkStorageSize(
00421 TupleData const &inputTuple,
00422 uint maxBufferSize);
00423
00430 inline void pack(TupleData const &inputTuple);
00431
00439 void unpack(TupleData &outputTuple, TupleProjection &destProj);
00440
00441
00442
00443
00444
00445
00446
00447 bool matches(
00448 TupleData const &inputTuple,
00449 TupleProjection const &inputKeyProj);
00450
00454 string toString();
00455 };
00456
00457 class FENNEL_HASHEXE_EXPORT LhxHashBlockAccessor
00458 : public LhxHashNodeAccessor
00459 {
00463 uint blockUsableSize;
00464
00468 uint numSlotsPerBlock;
00469
00473 PBuffer freePtr;
00474
00478 PBuffer endPtr;
00479
00480 public:
00481 LhxHashBlockAccessor() : LhxHashNodeAccessor()
00482 {
00483 }
00484
00490 void init(uint usablePageSize);
00491
00500 void setCurrent(PBuffer blockPtrInit, bool valid, bool clearContent);
00501
00505 void reset();
00506
00510 uint getUsableSize()
00511 {
00512 return blockUsableSize;
00513 }
00514
00518 uint getSlotsPerBlock()
00519 {
00520 return numSlotsPerBlock;
00521 }
00522
00530 PBuffer allocBuffer(uint bufSize);
00531
00532
00533
00534
00535
00536
00537
00538
00539 void allocSlots(uint slotCount = 1);
00540
00549 PBuffer *getSlot(uint slotNum);
00550 };
00551
00552 class FENNEL_HASHEXE_EXPORT LhxHashTable
00553 {
00554
00555
00556
00557
00561 uint numSlots;
00562
00569 std::vector<PBuffer> slotBlocks;
00570
00571 PBuffer *firstSlot;
00572 PBuffer *lastSlot;
00573
00577 SegmentAccessor scratchAccessor;
00578
00582 uint maxBlockCount;
00583
00584
00585
00586
00587
00588
00593 PBuffer firstBlock;
00594
00595 PBuffer currentBlock;
00596
00600 LhxHashBlockAccessor blockAccessor;
00601
00606 LhxHashBlockAccessor nodeBlockAccessor;
00607
00611 SegPageLock bufferLock;
00612
00616 uint currentBlockCount;
00617
00621 bool filterNull;
00622 TupleProjection filterNullKeyProj;
00623
00627 bool removeDuplicate;
00628
00629
00630
00635 uint partitionLevel;
00636 LhxHashGenerator hashGen;
00637 LhxHashGenerator hashGenSub;
00638
00644 TupleProjection keyColsAndAggsProj;
00645 TupleProjection keyColsProj;
00646 TupleProjection aggsProj;
00647 TupleProjection dataProj;
00648 vector<LhxHashTrim> isKeyColVarChar;
00649
00653 LhxHashKeyAccessor hashKeyAccessor;
00654 LhxHashDataAccessor hashDataAccessor;
00655
00659 uint maxBufferSize;
00660
00666 bool isGroupBy;
00667
00671 bool hasAggregates;
00672
00676 AggComputerList *aggComputers;
00677 TupleData aggWorkingTuple;
00678
00679 TupleDataWithBuffer aggResultTuple;
00680
00686 PBuffer allocBlock();
00687
00695 PBuffer allocBuffer(uint bufSize);
00696
00703 string printSlot(uint slotNum);
00704
00712 bool addKeyData(TupleData const &inputTuple);
00713
00722 bool addData(PBuffer keyNode, TupleData const &inputTuple);
00723
00730 bool aggData(PBuffer destKeyLoc, TupleData const &inputTuple);
00731
00740 static inline uint slotsNeeded(RecordNum cndKeys);
00741
00752 PBuffer findKeyLocation(
00753 TupleData const &inputTuple,
00754 TupleProjection const &inputKeyProj,
00755 bool isProbing,
00756 bool removeDuplicateProbe);
00757
00758 TupleData tmpKeyTuple;
00759 TupleData tmpDataTuple;
00760
00761 public:
00762
00763
00764
00765
00766
00767 static const uint LhxHashTableMinPages = 2;
00768
00778 void init(
00779 uint partitionLevelInit,
00780 LhxHashInfo const &hashInfo,
00781 AggComputerList *aggList,
00782 uint buildInputIndex);
00783
00791 void init(
00792 uint partitionLevelInit,
00793 LhxHashInfo const &hashInfo,
00794 uint buildInputIndex);
00795
00796
00797
00798
00799
00800
00801
00810 bool allocateResources(bool reuse = false);
00811
00818 void releaseResources(bool reuse = false);
00819
00831 void calculateSize(
00832 LhxHashInfo const &hashInfo,
00833 uint inputIndex,
00834 BlockNum &numBlocks);
00835
00845 void calculateNumSlots(
00846 RecordNum cndKeys,
00847 uint usablePageSize,
00848 BlockNum numBlocks);
00849
00859 PBuffer findKey(
00860 TupleData const &inputTuple,
00861 TupleProjection const &inputKeyProj,
00862 bool removeDuplicateProbe);
00863
00869 bool addTuple(TupleData const &inputTuple);
00870
00878 PBuffer *getSlot(uint slotNum);
00879
00883 inline uint getNumSlots() const;
00884
00888 inline PBuffer *getFirstSlot() const;
00889
00893 inline PBuffer *getNextSlot(PBuffer *curSlot);
00894
00898 inline bool isHashGroupBy() const;
00899
00905 string toString();
00906 };
00907
00908 class FENNEL_HASHEXE_EXPORT LhxHashTableReader
00909 {
00913 LhxHashTable *hashTable;
00914
00915
00922 bool isGroupBy;
00923
00927 PBuffer *curSlot;
00928 PBuffer curKey;
00929 PBuffer curData;
00930
00931
00932
00937 PBuffer boundKey;
00938
00943 bool returnUnMatched;
00944
00948 bool isPositioned;
00949
00953 LhxHashKeyAccessor hashKeyAccessor;
00954 LhxHashDataAccessor hashDataAccessor;
00955
00956
00957
00963 TupleProjection keyColsAndAggsProj;
00964 TupleProjection dataProj;
00965
00972 bool advanceSlot();
00973
00980 bool advanceKey();
00981
00988 bool advanceData();
00989
00993 void produceTuple(TupleData &outputTuple);
00994
00998 inline void bind(PBuffer key);
00999
01000 public:
01008 void init(
01009 LhxHashTable *hashTableInit,
01010 LhxHashInfo const &hashInfo,
01011 uint buildInputIndex);
01012
01019 inline void bindKey(PBuffer key);
01020
01025 inline void bindUnMatched();
01026
01034 bool getNext(TupleData &outputTuple);
01035
01036 inline LhxHashTable *getHashTable();
01037 };
01038
01039 inline LhxHashNodeAccessor::LhxHashNodeAccessor()
01040 {
01041 nextNodeOffset = 0;
01042 nodeBufferOffset = nextNodeOffset + getNextFieldSize();
01043 }
01044
01045 inline LhxHashNodeAccessor::LhxHashNodeAccessor(uint nextNodeOffsetInit)
01046 {
01047 nextNodeOffset = nextNodeOffsetInit;
01048 nodeBufferOffset = nextNodeOffset + getNextFieldSize();
01049 }
01050
01051 inline PBuffer LhxHashNodeAccessor::getCurrent()
01052 {
01053 return nodePtr;
01054 }
01055
01056 inline PBuffer LhxHashNodeAccessor::getBuffer()
01057 {
01058 return (nodePtr + nodeBufferOffset);
01059 }
01060
01061 inline void LhxHashNodeAccessor::setCurrent(PBuffer nodePtrInit)
01062 {
01063 nodePtr = nodePtrInit;
01064 }
01065
01066 inline void LhxHashNodeAccessor::reset()
01067 {
01068 setCurrent(NULL);
01069 }
01070
01071 inline PBuffer LhxHashNodeAccessor::getNext()
01072 {
01073
01074
01075
01076
01077
01078
01079
01080
01081 PBuffer returnPtr;
01082 memcpy((PBuffer)&returnPtr, nodePtr+nextNodeOffset, sizeof(PBuffer));
01083 return returnPtr;
01084 }
01085
01086 inline PBuffer LhxHashNodeAccessor::getNextLocation()
01087 {
01088 return nodePtr + nextNodeOffset;
01089 }
01090
01091 inline void LhxHashNodeAccessor::setNext(PBuffer nextNode)
01092 {
01093 memcpy(nodePtr+nextNodeOffset, (PBuffer)&nextNode, getNextFieldSize());
01094 }
01095
01096 inline void LhxHashNodeAccessor::setNext(PBuffer inputNode, PBuffer nextNode)
01097 {
01098 memcpy(inputNode+nextNodeOffset, (PBuffer)&nextNode, getNextFieldSize());
01099 }
01100
01101 inline uint LhxHashNodeAccessor::getNextFieldSize()
01102 {
01103 return sizeof(PBuffer);
01104 }
01105
01106 inline uint LhxHashNodeAccessor::getBufferOffset()
01107 {
01108 return nodeBufferOffset;
01109 }
01110
01111 inline void LhxHashDataAccessor::setCurrent(PBuffer nodePtrInit, bool valid)
01112 {
01113 LhxHashNodeAccessor::setCurrent(nodePtrInit);
01114 dataAccessor.setCurrentTupleBuf(getBuffer(), valid);
01115 }
01116
01117 inline uint LhxHashDataAccessor::getAvgStorageSize()
01118 {
01119
01120
01121 return
01122 ((dataAccessor.getMaxByteCount() +
01123 dataAccessor.getMinByteCount()) / 2) +
01124 getBufferOffset();
01125 }
01126
01127 inline uint LhxHashDataAccessor::getStorageSize(TupleData const &inputTuple)
01128 {
01129 return dataAccessor.getByteCount(inputTuple) + getBufferOffset();
01130 }
01131
01132 inline uint LhxHashDataAccessor::getDiskStorageSize(
01133 TupleData const &inputTuple)
01134 {
01135 return dataAccessor.getByteCount(inputTuple);
01136 }
01137
01138 inline void LhxHashDataAccessor::checkStorageSize(
01139 TupleData const &inputTuple,
01140 uint maxBufferSize)
01141 {
01142 uint storageSize = getStorageSize(inputTuple);
01143
01144 if (storageSize > maxBufferSize) {
01145 throw TupleOverflowExcn(
01146 dataDescriptor,
01147 inputTuple,
01148 storageSize,
01149 maxBufferSize);
01150 }
01151 }
01152
01153 inline void LhxHashKeyAccessor::pack(TupleData const &inputTuple)
01154 {
01155 PBuffer buf = getBuffer();
01156
01157 assert(buf != NULL && inputTuple.size() == keyAccessor.size());
01158
01159
01160
01161
01162 keyAccessor.marshal(inputTuple, buf);
01163 }
01164
01165 inline void LhxHashDataAccessor::pack(
01166 TupleData const &inputTuple)
01167 {
01168 PBuffer buf = getBuffer();
01169
01170 assert(buf != NULL && inputTuple.size() == dataAccessor.size());
01171
01172
01173
01174
01175 dataAccessor.marshal(inputTuple, buf);
01176 }
01177
01178 inline void LhxHashKeyAccessor::setCurrent(PBuffer nodePtrInit, bool valid)
01179 {
01180 LhxHashNodeAccessor::setCurrent(nodePtrInit);
01181 keyAccessor.setCurrentTupleBuf(getBuffer(), valid);
01182 }
01183
01184 inline uint LhxHashKeyAccessor::getAvgStorageSize()
01185 {
01186 return
01187 ((keyAccessor.getMaxByteCount() + keyAccessor.getMinByteCount()) / 2) +
01188 getBufferOffset();
01189 }
01190
01191 inline uint LhxHashKeyAccessor::getStorageSize(TupleData const &inputTuple)
01192 {
01193 return keyAccessor.getByteCount(inputTuple) + getBufferOffset();
01194 }
01195
01196 inline uint LhxHashKeyAccessor::getDiskStorageSize(TupleData const &inputTuple)
01197 {
01198 return keyAccessor.getByteCount(inputTuple);
01199 }
01200
01201 inline PBuffer LhxHashKeyAccessor::getFirstData()
01202 {
01203 PBuffer returnPtr;
01204 memcpy(
01205 (PBuffer) &returnPtr,
01206 (PBuffer) (getCurrent() + firstDataOffset),
01207 sizeof(PBuffer));
01208 return returnPtr;
01209 }
01210
01211 inline void LhxHashKeyAccessor::setFirstData(PBuffer inputFirstData)
01212 {
01213 memcpy(
01214 (PBuffer)(getCurrent() + firstDataOffset),
01215 (PBuffer)&inputFirstData,
01216 sizeof(PBuffer));
01217 }
01218
01219 inline PBuffer *LhxHashKeyAccessor::getNextSlot()
01220 {
01221 PBuffer *returnPtr;
01222 memcpy(
01223 (PBuffer) &returnPtr,
01224 (PBuffer) (getCurrent() + nextSlotOffset),
01225 sizeof(PBuffer *));
01226 return returnPtr;
01227 }
01228
01229 inline void LhxHashKeyAccessor::setNextSlot(PBuffer *nextSlot)
01230 {
01231 memcpy(
01232 (PBuffer)(getCurrent() + nextSlotOffset),
01233 (PBuffer)&nextSlot,
01234 sizeof(PBuffer*));
01235 }
01236
01237 inline bool LhxHashKeyAccessor::isMatched()
01238 {
01239 return (*(uint8_t *)(getCurrent() + isMatchedOffset) == 1);
01240 }
01241
01242 inline void LhxHashKeyAccessor::setMatched(bool matched)
01243 {
01244 *(getCurrent() + isMatchedOffset) = (matched ? 0x01 : 0);
01245 }
01246
01247 inline void LhxHashKeyAccessor::checkStorageSize(
01248 TupleData const &inputTuple,
01249 uint maxBufferSize)
01250 {
01251 uint storageSize = getStorageSize(inputTuple);
01252
01253 if (storageSize > maxBufferSize) {
01254 throw TupleOverflowExcn(
01255 keyDescriptor,
01256 inputTuple,
01257 storageSize,
01258 maxBufferSize);
01259 }
01260 }
01261
01262 inline void LhxHashBlockAccessor::reset()
01263 {
01264 LhxHashNodeAccessor::reset();
01265 freePtr = endPtr = NULL;
01266 }
01267
01268 inline void LhxHashBlockAccessor::allocSlots(uint slotCount)
01269 {
01270
01271
01272
01273 PBuffer bufPtr = allocBuffer(slotCount * sizeof(PBuffer));
01274 assert (bufPtr != NULL);
01275 }
01276
01277 inline uint LhxHashTable::slotsNeeded(RecordNum cndKeys)
01278 {
01279 RecordNum cKeys = RecordNum(ceil(cndKeys * 1.2));
01280 if (cKeys >= uint(MAXU)) {
01281 return uint(MAXU) - 1;
01282 } else {
01283 return uint(cKeys);
01284 }
01285 }
01286
01287 inline uint LhxHashTable::getNumSlots() const
01288 {
01289 return numSlots;
01290 }
01291
01292 inline PBuffer *LhxHashTable::getFirstSlot() const
01293 {
01294 return firstSlot;
01295 }
01296
01297 inline PBuffer *LhxHashTable::getNextSlot(PBuffer *curSlot)
01298 {
01299 hashKeyAccessor.setCurrent((*curSlot), true);
01300 return hashKeyAccessor.getNextSlot();
01301 }
01302
01303 inline bool LhxHashTable::isHashGroupBy() const
01304 {
01305 return isGroupBy;
01306 }
01307
01308 inline LhxHashTable *LhxHashTableReader::getHashTable()
01309 {
01310 return hashTable;
01311 }
01312
01313 inline void LhxHashTableReader::bind(PBuffer key)
01314 {
01315 boundKey = curKey = key;
01316 curSlot = NULL;
01317 curData = NULL;
01318 isPositioned = false;
01319 }
01320
01321 inline void LhxHashTableReader::bindKey(PBuffer key)
01322 {
01323 returnUnMatched = false;
01324 bind(key);
01325 }
01326
01327 inline void LhxHashTableReader::bindUnMatched()
01328 {
01329 returnUnMatched = true;
01330 bind(NULL);
01331 }
01332
01333 FENNEL_END_NAMESPACE
01334
01335 #endif
01336
01337