00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "fennel/common/CommonPreamble.h"
00023 #include "fennel/lucidera/colstore/LcsHash.h"
00024
00025 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsHash.cpp#13 $");
00026
00027
00028
00029
00030 static uint8_t MagicTable[256] =
00031 {
00032 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84,
00033 249, 230, 44, 163, 14, 197, 213, 181, 161, 85, 218, 80,
00034 64, 239, 24, 226, 236, 142, 38, 200, 110, 177, 104, 103,
00035 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
00036 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 226,
00037 236, 142, 38, 235, 97, 234, 57, 22, 60, 250, 82, 175,
00038 208, 5, 127, 199, 111, 62, 135, 248, 174, 169, 211, 58,
00039 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
00040 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13,
00041 153, 246, 216, 219, 119, 68, 223, 78, 83, 88, 201, 99,
00042 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183,
00043 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
00044 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220,
00045 90, 53, 23, 131, 125, 173, 15, 238, 79, 95, 89, 16,
00046 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157,
00047 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
00048 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247,
00049 19, 205, 39, 203, 233, 40, 186, 147, 198, 192, 155, 33,
00050 164, 191, 98, 204, 165, 180, 117, 76, 140, 36, 210, 172,
00051 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
00052 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43,
00053 71, 109, 184, 209
00054 };
00055
00056
00057
00058
00059
00060
00061 LcsHash::LcsHash()
00062 {
00063 columnId = 0;
00064 valCnt = 0;
00065 maxValueSize = 0;
00066 magicTable = MagicTable;
00067 numMatches = 0;
00068 numChecks = 0;
00069 }
00070
00071 void LcsHash::init(
00072 PBuffer hashBlockInit,
00073 SharedLcsClusterNodeWriter clusterBlockWriterInit,
00074 TupleDescriptor const &colTupleDescInit,
00075 uint columnIdInit,
00076 uint blockSizeInit)
00077 {
00078
00079
00080
00081 memset(hashBlockInit,0,blockSizeInit);
00082 hash.init(hashBlockInit, blockSizeInit);
00083
00084 columnId = columnIdInit;
00085 valCnt = 0;
00086 maxValueSize = 0;
00087 clusterBlockWriter = clusterBlockWriterInit;
00088
00089 assert(colTupleDescInit.size() == 1);
00090 colTupleDesc = colTupleDescInit;
00091 attrAccessor.compute(colTupleDesc[0]);
00092
00093
00094
00095
00096
00097 colTuple.computeAndAllocate(colTupleDesc);
00098 searchTuple.computeAndAllocate(colTupleDesc);
00099
00100
00101
00102
00103
00104
00105 colTupleBuffer.reset(
00106 new FixedBuffer[attrAccessor.getMaxByteCount()]);
00107
00108 compareInst = SharedLcsCompareColKeyUsingOffsetIndex(
00109 new LcsCompareColKeyUsingOffsetIndex(
00110 clusterBlockWriter, &hash, colTupleDesc, columnId, attrAccessor));
00111 }
00112
00113 void LcsHash::insert(
00114 TupleDatum &colTupleDatum,
00115 LcsHashValOrd *valOrd,
00116 bool *undoInsert)
00117 {
00118
00119
00120
00121 PBuffer dataWithLen = colTupleBuffer.get();
00122 attrAccessor.storeValue(colTupleDatum, dataWithLen);
00123
00124 insert(dataWithLen, valOrd, undoInsert);
00125 }
00126
00127
00128 void LcsHash::insert(
00129 PBuffer dataWithLen,
00130 LcsHashValOrd *valOrd,
00131 bool *undoInsert)
00132 {
00133 uint key;
00134 uint16_t newValueOffset;
00135 LcsHashValueNode *vPtr = 0;
00136 TupleStorageByteLength storageLength;
00137
00138
00139
00140
00141
00142
00143
00144 bool noCompress = clusterBlockWriter->noCompressMode(columnId);
00145 key = noCompress ? 0 : computeKey(dataWithLen);
00146
00147 *undoInsert = false;
00148
00149
00150
00151
00152
00153
00154
00155 if (noCompress || !search(key, dataWithLen, valOrd, &vPtr)) {
00156 LcsHashValueNode *newNode;
00157
00158
00159
00160
00161
00162
00163 *undoInsert =
00164 hash.isFull() ||
00165 !clusterBlockWriter->addValue(
00166 columnId, dataWithLen, &newValueOffset);
00167
00168 if (*undoInsert) {
00169
00170
00171
00172 undo.set(NOTHING, key, maxValueSize, 0);
00173 return;
00174 }
00175
00176
00177
00178
00179
00180
00181
00182
00183 newNode = hash.getNewValueNode();
00184 newNode->valueOffset = newValueOffset;
00185 *valOrd = valCnt ++;
00186 valOrd->setValueInBatch();
00187 newNode->valueOrd = *valOrd;
00188
00189 hash.insertNewValueNode(key, newNode);
00190
00191
00192
00193
00194 undo.set(NEWENTRY, key, maxValueSize, 0);
00195
00196 storageLength = attrAccessor.getStoredByteCount(dataWithLen);
00197
00198 if (storageLength > maxValueSize) {
00199 maxValueSize = storageLength;
00200 }
00201 } else {
00202
00203
00204
00205
00206
00207
00208
00209 bool bFirstTimeInBatch = !valOrd->isValueInBatch();
00210
00211 *undoInsert =
00212 !clusterBlockWriter->addValue(columnId, bFirstTimeInBatch);
00213
00214 if (*undoInsert) {
00215
00216
00217
00218 undo.set(NOTHING, key, maxValueSize, 0);
00219 return;
00220 }
00221
00222 (vPtr->valueOrd).setValueInBatch();
00223 *valOrd = vPtr->valueOrd;
00224
00225
00226
00227
00228 if (bFirstTimeInBatch) {
00229 undo.set(NEWBATCHVALUE, key, maxValueSize, vPtr);
00230 } else {
00231 undo.set(NOTHING, key, maxValueSize, 0);
00232 }
00233 }
00234
00235
00236
00237
00238 }
00239
00240 void LcsHash::undoInsert(TupleDatum &colTupleDatum)
00241 {
00242
00243
00244
00245 PBuffer dataWithLen = colTupleBuffer.get();
00246 attrAccessor.storeValue(colTupleDatum, dataWithLen);
00247
00248 undoInsert(dataWithLen);
00249
00250 }
00251
00252 void LcsHash::undoInsert(PBuffer dataWithLen)
00253 {
00254 switch (undo.what) {
00255 case NOTHING:
00256 {
00257
00258
00259
00260 clusterBlockWriter->undoValue(columnId, NULL, false);
00261 break;
00262 }
00263 case NEWENTRY:
00264 {
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 valCnt--;
00276 hash.undoNewValueNode(undo.key);
00277 maxValueSize = undo.origMaxValueSize;
00278 clusterBlockWriter->undoValue(columnId, dataWithLen, true);
00279 break;
00280 }
00281 case NEWBATCHVALUE:
00282 {
00283
00284
00285
00286
00287 clusterBlockWriter->undoValue(columnId, NULL, true);
00288 (&undo.vPtr->valueOrd)->clearValueInBatch();
00289 break;
00290 }
00291 }
00292 undo.reset();
00293 }
00294
00295 bool LcsHash::search(
00296 uint key,
00297 PBuffer dataWithLen,
00298 LcsHashValOrd *valOrd,
00299 LcsHashValueNode **vNode = 0)
00300 {
00301 LcsHashValueNode *valueNode;
00302 bool compareRes;
00303
00304 attrAccessor.loadValue(colTuple[0], dataWithLen);
00305
00306 for (valueNode = hash.getFirstValueNode(key);
00307 valueNode != NULL;
00308 valueNode = hash.getNextValueNode(valueNode))
00309 {
00310 numChecks++;
00311
00312
00313
00314
00315
00316 if (valueNode->valueOffset == 0) {
00317 continue;
00318 }
00319
00320 attrAccessor.loadValue(
00321 searchTuple[0],
00322 clusterBlockWriter->getOffsetPtr(columnId, valueNode->valueOffset));
00323
00324 compareRes = colTupleDesc.compareTuples(colTuple, searchTuple);
00325
00326
00327
00328
00329 searchTuple.resetBuffer();
00330
00331 if (compareRes == 0) {
00332 numMatches++;
00333 *valOrd = valueNode->valueOrd;
00334 if (vNode) {
00335 *vNode = valueNode;
00336 }
00337 colTuple.resetBuffer();
00338 return true;
00339 }
00340 }
00341
00342 colTuple.resetBuffer();
00343
00344
00345
00346 return false;
00347 }
00348
00349
00350 void LcsHash::prepareCompressedBatch(
00351 uint8_t *rowArray,
00352 uint numRows,
00353 uint16_t *numVals,
00354 uint16_t *offsetIndexVector)
00355 {
00356 uint16_t i;
00357 uint16_t *rowWORDArray=(uint16_t*)rowArray;
00358 *numVals = 0;
00359
00360
00361
00362
00363 for (i = 0; i < valCnt; i++) {
00364 if ((hash.valueNodes[i].valueOrd).isValueInBatch()) {
00365 hash.valueNodes[i].sortedOrd = *numVals;
00366 offsetIndexVector[(*numVals)++] = i;
00367 }
00368 }
00369
00370
00371
00372
00373 std::sort(
00374 offsetIndexVector,
00375 offsetIndexVector + (*numVals),
00376 LcsCompare(compareInst));
00377
00378
00379
00380
00381
00382 for (i = 0; i < *numVals; i++) {
00383 hash.valueNodes[offsetIndexVector[i]].sortedOrd = i;
00384 }
00385
00386
00387
00388
00389
00390
00391
00392 for (i = 0; i < *numVals; i++) {
00393 offsetIndexVector[i] =
00394 hash.valueNodes[offsetIndexVector[i]].valueOffset;
00395 }
00396
00397
00398
00399
00400
00401
00402 for (i = 0; i < numRows; i++) {
00403 rowWORDArray[i] = hash.valueNodes[rowWORDArray[i]].sortedOrd;
00404 }
00405 }
00406
00407 void LcsHash::prepareFixedOrVariableBatch(
00408 uint8_t *rowArray,
00409 uint numRows)
00410 {
00411 uint i;
00412 uint16_t *rowWORDArray=(uint16_t*)rowArray;
00413 LcsHashValueNode *pValueNodes;
00414
00415 pValueNodes = hash.valueNodes;
00416
00417
00418
00419
00420 for (i = 0; i < numRows; i++) {
00421 rowWORDArray[i] = pValueNodes[rowWORDArray[i]].valueOffset;
00422 }
00423 }
00424
00425
00426 void LcsHash::clearFixedEntries()
00427 {
00428
00429
00430
00431 if (!clusterBlockWriter->noCompressMode(columnId)) {
00432 for (uint i = 0; i < valCnt; i++) {
00433 if ((hash.valueNodes[i].valueOrd).isValueInBatch()) {
00434 hash.valueNodes[i].valueOffset = 0;
00435 }
00436 }
00437 }
00438 }
00439
00440 void LcsHash::restore(uint numVals, uint16_t lastValOff)
00441 {
00442 uint i;
00443 uint key;
00444 PBuffer dataWithLen;
00445 LcsHashValueNode *newNode;
00446 LcsHashValOrd dummy;
00447 LcsHashValOrd valOrd;
00448 TupleStorageByteLength storageLength;
00449
00450
00451
00452
00453
00454
00455
00456 bool noCompress = clusterBlockWriter->noCompressMode(columnId);
00457
00458 for (i = 0; i < numVals && !(hash.isFull()); i++) {
00459 dataWithLen = clusterBlockWriter->getOffsetPtr(columnId,lastValOff);
00460 key = noCompress ? 0 : computeKey(dataWithLen);
00461
00462
00463
00464
00465
00466
00467 if (noCompress || !search(key, dataWithLen, &dummy)) {
00468 newNode = hash.getNewValueNode();
00469
00470 valOrd = valCnt++;
00471 newNode->valueOrd = valOrd;
00472 newNode->valueOffset = (uint16_t) lastValOff;
00473
00474 hash.insertNewValueNode(key, newNode);
00475
00476 storageLength = attrAccessor.getStoredByteCount(dataWithLen);
00477 if (storageLength > maxValueSize) {
00478 maxValueSize = storageLength;
00479 }
00480 }
00481
00482 lastValOff = clusterBlockWriter->getNextVal(
00483 columnId,
00484 (uint16_t)lastValOff);
00485 }
00486 }
00487
00488 void LcsHash::startNewBatch(uint leftOvers)
00489 {
00490
00491
00492
00493
00494 if (clusterBlockWriter->noCompressMode(columnId) ||
00495 hash.isFull(leftOvers))
00496 {
00497 hash.resetHash();
00498 valCnt = 0;
00499 maxValueSize = 0;
00500 } else {
00501 hash.resetBatch();
00502 }
00503 }
00504
00505
00506
00507
00508
00509
00510 uint LcsHash::computeKey(PBuffer dataWithLen)
00511 {
00512 uint8_t keyVal[2] = {0,0}, oldKeyVal[2]={0,17};
00513 uint i, colSize = attrAccessor.getStoredByteCount(dataWithLen);
00514
00515
00516
00517
00518
00519 for (i = 0;
00520 i < colSize;
00521 oldKeyVal[0] = keyVal[0], oldKeyVal[1] = keyVal[1], i++, dataWithLen++)
00522 {
00523 keyVal[0] = magicTable[oldKeyVal[0] ^ *dataWithLen];
00524 keyVal[1] = magicTable[oldKeyVal[1] ^ *dataWithLen];
00525 }
00526
00527 return ((keyVal[1]<<8) + keyVal[0]) % hash.numHashEntries();
00528 }
00529
00530
00531
00532
00533
00534
00535 void LcsHashTable::init(PBuffer hashBlockInit, uint hashBlockSizeInit)
00536 {
00537 hashBlock = hashBlockInit;
00538 hashBlockSize = hashBlockSizeInit;
00539 memset(hashBlock, 0, hashBlockSize);
00540
00541
00542
00543
00544
00545 hashTableSize = hashBlockSize /
00546 (sizeof(uint16_t) + sizeof(LcsHashValueNode));
00547
00548
00549
00550
00551 entry = (uint16_t *)hashBlock;
00552
00553
00554
00555
00556 valueNodes = reinterpret_cast<LcsHashValueNode *>(
00557 hashBlock + sizeof(uint16_t) * hashTableSize);
00558
00559
00560
00561
00562 nextValueNode = 0;
00563 }
00564
00565
00566 LcsCompareColKeyUsingOffsetIndex::LcsCompareColKeyUsingOffsetIndex(
00567 SharedLcsClusterNodeWriter clusterBlockWriterInit,
00568 LcsHashTable *hashTableInit,
00569 TupleDescriptor const &colTupleDescInit,
00570 uint columnIdInit,
00571 UnalignedAttributeAccessor const &attrAccessorInit)
00572 {
00573 hashTable = hashTableInit;
00574 clusterBlockWriter = clusterBlockWriterInit;
00575 columnId = columnIdInit;
00576 colTupleDesc = colTupleDescInit;
00577 colTuple1.computeAndAllocate(colTupleDesc);
00578 colTuple2.computeAndAllocate(colTupleDesc);
00579 attrAccessor = attrAccessorInit;
00580
00581
00582
00583
00584 assert((colTuple1.size() == 1) && (colTuple2.size() == 1));
00585 }
00586
00587
00588 bool LcsCompareColKeyUsingOffsetIndex::lessThan(
00589 const uint16_t colKeyOffsetIndex1,
00590 const uint16_t colKeyOffsetIndex2)
00591 {
00592 bool isLessThan = false;
00593
00594
00595
00596
00597
00598 attrAccessor.loadValue(
00599 colTuple1[0],
00600 clusterBlockWriter->getOffsetPtr(
00601 columnId, hashTable->valueNodes[colKeyOffsetIndex1].valueOffset));
00602
00603 attrAccessor.loadValue(
00604 colTuple2[0],
00605 clusterBlockWriter->getOffsetPtr(
00606 columnId, hashTable->valueNodes[colKeyOffsetIndex2].valueOffset));
00607
00608
00609
00610
00611
00612 isLessThan = colTupleDesc.compareTuples(colTuple1, colTuple2) < 0;
00613
00614 colTuple1.resetBuffer();
00615 colTuple2.resetBuffer();
00616
00617 return (isLessThan);
00618 }
00619
00620 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsHash.cpp#13 $");
00621
00622