00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "fennel/common/CommonPreamble.h"
00025 #include "fennel/test/SegStorageTestBase.h"
00026 #include "fennel/btree/BTreeBuilder.h"
00027 #include "fennel/btree/BTreeVerifier.h"
00028 #include "fennel/btree/BTreeReader.h"
00029 #include "fennel/btree/BTreeWriter.h"
00030 #include "fennel/segment/SegInputStream.h"
00031 #include "fennel/segment/SegOutputStream.h"
00032 #include "fennel/tuple/StandardTypeDescriptor.h"
00033 #include "fennel/tuple/TupleAccessor.h"
00034
00035 #include <boost/test/test_tools.hpp>
00036
00037 using namespace fennel;
00038
00039 #include <functional>
00040
00041 class BTreeTest : virtual public SegStorageTestBase
00042 {
00043 enum {
00044 nRandomSeed = 1000000,
00045 iValueMax = 1000000000
00046 };
00047
00048 enum InsertType {
00049 MONOTONIC,
00050 SEQUENTIAL,
00051 RANDOM
00052 };
00053
00054 struct Record
00055 {
00056 int32_t key;
00057 int32_t secondKey;
00058 int32_t value;
00059 };
00060
00061 BTreeDescriptor descriptor;
00062
00063 TupleAccessor tupleAccessor;
00064 TupleData keyData;
00065 TupleData tupleData;
00066 Record record;
00067 boost::scoped_array<FixedBuffer> recordBuf;
00068
00069 int32_t readKey();
00070 int32_t readSecondKey();
00071 int32_t readValue();
00072 int32_t readMultiKeyValue();
00073 void verifyTree(uint nRecordsExpected,uint nLevelsExpected);
00074
00075 void testBulkLoadOneLevelNewRoot()
00076 {
00077 testBulkLoad(200,1,true);
00078 }
00079
00080 void testBulkLoadOneLevelReuseRoot()
00081 {
00082 testBulkLoad(200,1,false);
00083 }
00084
00085 void testBulkLoadTwoLevelsNewRoot()
00086 {
00087 testBulkLoad(20000,2,true);
00088 }
00089
00090 void testBulkLoadTwoLevelsReuseRoot()
00091 {
00092 testBulkLoad(20000,2,false);
00093 }
00094
00095 void testBulkLoadThreeLevels()
00096 {
00097 testBulkLoad(200000,3,true);
00098 }
00099
00100 void testBulkLoad(uint nRecords,uint nLevelsExpected,bool newRoot);
00101 void testScan(
00102 SharedByteInputStream,
00103 uint nRecords,
00104 bool alternating,
00105 bool deletion);
00106 void testSearch(SharedByteInputStream,uint nRecords,bool leastUpper);
00107 void testSearchLast();
00108
00109 void testSequentialInserts()
00110 {
00111 testInserts(SEQUENTIAL);
00112 }
00113
00114 void testRandomInserts()
00115 {
00116 testInserts(RANDOM);
00117 }
00118
00119 void testMonotonicInserts()
00120 {
00121 testInserts(MONOTONIC);
00122 }
00123
00124 void testInserts(InsertType insertType);
00125
00126 void marshalRecord();
00127 void marshalMultiKeyRecord();
00128 void unmarshalRecord(SharedByteInputStream pInputStream);
00129
00130 void testMultiKeySearches(uint nKey1, uint nKey2);
00131
00132 void testSmallMultiKeySearches()
00133 {
00134
00135 testMultiKeySearches(200, 4);
00136 }
00137
00138 void testBigMultiKeySearches()
00139 {
00140
00141
00142 testMultiKeySearches(4, 20000);
00143 }
00144
00145 public:
00146 explicit BTreeTest()
00147 {
00148 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadOneLevelNewRoot);
00149 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadOneLevelReuseRoot);
00150 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadTwoLevelsNewRoot);
00151 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadTwoLevelsReuseRoot);
00152 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadThreeLevels);
00153 FENNEL_UNIT_TEST_CASE(BTreeTest,testSequentialInserts);
00154 FENNEL_UNIT_TEST_CASE(BTreeTest,testRandomInserts);
00155 FENNEL_UNIT_TEST_CASE(BTreeTest,testMonotonicInserts);
00156
00157
00158
00159
00160 FENNEL_UNIT_TEST_CASE(BTreeTest,testSmallMultiKeySearches);
00161 FENNEL_UNIT_TEST_CASE(BTreeTest,testBigMultiKeySearches);
00162
00163 StandardTypeDescriptorFactory stdTypeFactory;
00164 TupleAttributeDescriptor attrDesc(
00165 stdTypeFactory.newDataType(STANDARD_TYPE_INT_32));
00166 descriptor.tupleDescriptor.push_back(attrDesc);
00167 descriptor.tupleDescriptor.push_back(attrDesc);
00168 descriptor.keyProjection.push_back(0);
00169 tupleAccessor.compute(descriptor.tupleDescriptor);
00170 recordBuf.reset(new FixedBuffer[tupleAccessor.getMaxByteCount()]);
00171 tupleData.compute(descriptor.tupleDescriptor);
00172 }
00173
00174 virtual void testCaseSetUp();
00175 virtual void testCaseTearDown();
00176 };
00177
00178 void BTreeTest::testCaseSetUp()
00179 {
00180 openStorage(DeviceMode::createNew);
00181 openRandomSegment();
00182
00183 descriptor.segmentAccessor.pSegment = pRandomSegment;
00184 descriptor.segmentAccessor.pCacheAccessor = pCache;
00185 }
00186
00187 void BTreeTest::testCaseTearDown()
00188 {
00189 descriptor.segmentAccessor.reset();
00190 SegStorageTestBase::testCaseTearDown();
00191 }
00192
00193 void BTreeTest::marshalRecord()
00194 {
00195 tupleData[0].pData = reinterpret_cast<PBuffer>(&record.key);
00196 tupleData[1].pData = reinterpret_cast<PBuffer>(&record.value);
00197 tupleAccessor.marshal(tupleData, recordBuf.get());
00198 }
00199
00200 void BTreeTest::marshalMultiKeyRecord()
00201 {
00202 tupleData[0].pData = reinterpret_cast<PBuffer>(&record.key);
00203 tupleData[1].pData = reinterpret_cast<PBuffer>(&record.secondKey);
00204 tupleData[2].pData = reinterpret_cast<PBuffer>(&record.value);
00205 tupleAccessor.marshal(tupleData, recordBuf.get());
00206 }
00207
00208 void BTreeTest::unmarshalRecord(SharedByteInputStream pInputStream)
00209 {
00210 PConstBuffer pBuf = pInputStream->getReadPointer(1);
00211 tupleAccessor.setCurrentTupleBuf(pBuf);
00212 uint cbTuple = tupleAccessor.getCurrentByteCount();
00213 tupleAccessor.unmarshal(tupleData);
00214 record.key = readKey();
00215 record.value = readValue();
00216 pInputStream->consumeReadPointer(cbTuple);
00217 }
00218
00219 void BTreeTest::testBulkLoad(uint nRecords,uint nLevelsExpected,bool newRoot)
00220 {
00221 BlockNum nPagesAllocatedInitially =
00222 pRandomSegment->getAllocatedSizeInPages();
00223
00224 descriptor.rootPageId = NULL_PAGE_ID;
00225 BTreeBuilder builder(descriptor,pRandomSegment);
00226
00227 keyData.compute(builder.getKeyDescriptor());
00228 keyData[0].pData = reinterpret_cast<PConstBuffer>(&record.key);
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 if (!newRoot) {
00240 builder.createEmptyRoot();
00241 descriptor.rootPageId = builder.getRootPageId();
00242 }
00243
00244
00245 SegmentAccessor segmentAccessor(pRandomSegment,pCache);
00246 SharedSegOutputStream pOutputStream =
00247 SegOutputStream::newSegOutputStream(segmentAccessor);
00248 std::subtractive_rng randomNumberGenerator(nRandomSeed);
00249 record.key = 0;
00250 for (uint i = 0; i < nRecords; i++) {
00251
00252 record.key += (randomNumberGenerator(10)) + 2;
00253 record.value = randomNumberGenerator(iValueMax);
00254
00255 marshalRecord();
00256 uint cbTuple = tupleAccessor.getCurrentByteCount();
00257 PBuffer pBuffer = pOutputStream->getWritePointer(cbTuple);
00258 memcpy(pBuffer,recordBuf.get(),cbTuple);
00259 pOutputStream->consumeWritePointer(cbTuple);
00260 }
00261 PageId pageId = pOutputStream->getFirstPageId();
00262 pOutputStream.reset();
00263 SharedSegInputStream pInputStream =
00264 SegInputStream::newSegInputStream(segmentAccessor,pageId);
00265 SegStreamPosition startPos;
00266 pInputStream->getSegPos(startPos);
00267
00268
00269 builder.build(*pInputStream,nRecords,1.0);
00270 descriptor.rootPageId = builder.getRootPageId();
00271
00272
00273 verifyTree(nRecords,nLevelsExpected);
00274
00275
00276 pInputStream->seekSegPos(startPos);
00277 testSearch(pInputStream,nRecords,true);
00278
00279
00280
00281 pInputStream->seekSegPos(startPos);
00282 testSearch(pInputStream,nRecords,false);
00283
00284
00285 pInputStream->seekSegPos(startPos);
00286 testScan(pInputStream,nRecords,false,false);
00287
00288
00289 pInputStream->seekSegPos(startPos);
00290 testScan(pInputStream,nRecords,true,true);
00291
00292
00293 verifyTree(nRecords / 2,nLevelsExpected);
00294
00295
00296 pInputStream->seekSegPos(startPos);
00297 testScan(pInputStream,nRecords,true,false);
00298
00299
00300 pInputStream->seekSegPos(startPos);
00301 pInputStream->setDeallocate(true);
00302 pInputStream.reset();
00303
00304
00305 builder.truncate(true);
00306
00307
00308 BOOST_CHECK_EQUAL(
00309 pRandomSegment->getAllocatedSizeInPages(),
00310 nPagesAllocatedInitially);
00311 }
00312
00313 void BTreeTest::verifyTree(uint nRecordsExpected,uint nLevelsExpected)
00314 {
00315 BTreeVerifier verifier(descriptor);
00316 verifier.verify();
00317
00318 BTreeStatistics const &stats = verifier.getStatistics();
00319 BOOST_CHECK_EQUAL(stats.nLevels,nLevelsExpected);
00320 BOOST_CHECK_EQUAL(stats.nTuples,nRecordsExpected);
00321 }
00322
00323 void BTreeTest::testSearch(
00324 SharedByteInputStream pInputStream,uint nRecords,bool leastUpper)
00325 {
00326 BTreeReader reader(descriptor);
00327 for (uint i = 0; i < nRecords; ++i) {
00328 unmarshalRecord(pInputStream);
00329 if (!reader.searchForKey(keyData,DUP_SEEK_ANY,leastUpper)) {
00330 BOOST_FAIL("LeastUpper:" << leastUpper <<
00331 ". Could not find key #" << i << ": " << record.key);
00332 }
00333 reader.getTupleAccessorForRead().unmarshal(tupleData);
00334 BOOST_CHECK_EQUAL(record.key,readKey());
00335 BOOST_CHECK_EQUAL(record.value,readValue());
00336 if (!(i % 10000)) {
00337 BOOST_MESSAGE(
00338 "found value = " << readValue()
00339 << " key = " << readKey());
00340 }
00341 record.key++;
00342 if (reader.searchForKey(keyData,DUP_SEEK_ANY)) {
00343 BOOST_FAIL("Found key " << record.key << " (shouldn't exist!)");
00344 }
00345 }
00346 }
00347
00348 void BTreeTest::testScan(
00349 SharedByteInputStream pInputStream,uint nRecords,
00350 bool alternating,bool deletion)
00351 {
00352 BTreeReader realReader(descriptor);
00353
00354 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment(
00355 pCache,
00356 1);
00357
00358 BTreeWriter writer(descriptor,scratchAccessor);
00359
00360 bool found;
00361 int32_t lastKey = -1;
00362 BTreeReader &reader = deletion ? writer : realReader;
00363 found = reader.searchFirst();
00364 if (!found) {
00365 BOOST_FAIL("searchFirst found nothing");
00366 }
00367 if (alternating && !deletion) {
00368 nRecords /= 2;
00369 }
00370 for (uint i = 0; i < nRecords; ++i) {
00371 unmarshalRecord(pInputStream);
00372 if (alternating && !deletion) {
00373 unmarshalRecord(pInputStream);
00374 }
00375 if (!found) {
00376 BOOST_FAIL("Could not searchNext for key #"
00377 << i << ": " << record.key);
00378 }
00379 reader.getTupleAccessorForRead().unmarshal(tupleData);
00380 lastKey = readKey();
00381 BOOST_CHECK_EQUAL(record.key,lastKey);
00382 BOOST_CHECK_EQUAL(record.value,readValue());
00383 if (!(i % 10000)) {
00384 BOOST_MESSAGE(
00385 "scanned value = " << readValue()
00386 << " key = " << lastKey);
00387 }
00388 if (deletion) {
00389 if (!alternating || !(i & 1)) {
00390 writer.deleteCurrent();
00391 }
00392 }
00393 found = reader.searchNext();
00394 }
00395
00396 reader.endSearch();
00397
00398 if (!deletion) {
00399 found = reader.searchLast();
00400 if (!found) {
00401 BOOST_FAIL("searchLast found nothing");
00402 }
00403 reader.getTupleAccessorForRead().unmarshal(tupleData);
00404 BOOST_CHECK_EQUAL(lastKey,readKey());
00405 reader.endSearch();
00406 }
00407 }
00408
00409 void BTreeTest::testSearchLast()
00410 {
00411 BTreeReader reader(descriptor);
00412 bool found = reader.searchLast();
00413 if (!found) {
00414 BOOST_FAIL("searchLast found nothing");
00415 }
00416 }
00417
00418 int32_t BTreeTest::readKey()
00419 {
00420 return *reinterpret_cast<int32_t const *>(
00421 tupleData[0].pData);
00422 }
00423
00424 int32_t BTreeTest::readSecondKey()
00425 {
00426 return *reinterpret_cast<int32_t const *>(
00427 tupleData[1].pData);
00428 }
00429
00430 int32_t BTreeTest::readValue()
00431 {
00432 return *reinterpret_cast<int32_t const *>(
00433 tupleData[1].pData);
00434 }
00435
00436 int32_t BTreeTest::readMultiKeyValue()
00437 {
00438 return *reinterpret_cast<int32_t const *>(
00439 tupleData[2].pData);
00440 }
00441
00442 void BTreeTest::testInserts(InsertType insertType)
00443 {
00444 uint nRecords = 200000;
00445 descriptor.rootPageId = NULL_PAGE_ID;
00446 BTreeBuilder builder(descriptor,pRandomSegment);
00447
00448 keyData.compute(builder.getKeyDescriptor());
00449 keyData[0].pData = reinterpret_cast<PConstBuffer>(&record.key);
00450
00451 tupleData.compute(descriptor.tupleDescriptor);
00452
00453 builder.createEmptyRoot();
00454 descriptor.rootPageId = builder.getRootPageId();
00455
00456 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment(
00457 pCache,
00458 1);
00459
00460 bool monotonic = (insertType == MONOTONIC);
00461 BTreeWriter writer(descriptor,scratchAccessor,monotonic);
00462
00463 std::vector<int32_t> keys;
00464 for (uint i = 0; i < nRecords; i++) {
00465 keys.push_back(i);
00466 }
00467 if (insertType == RANDOM) {
00468 std::random_shuffle(keys.begin(), keys.end());
00469 }
00470
00471
00472
00473 for (uint i = 0; i < nRecords; i++) {
00474 record.key = keys[i];
00475 record.value = -keys[i];
00476 marshalRecord();
00477 writer.insertTupleFromBuffer(
00478 recordBuf.get(), DUP_FAIL);
00479 }
00480
00481 BTreeReader reader(descriptor);
00482 for (uint i = 0; i < nRecords; ++i) {
00483 record.key = i;
00484 record.value = -i;
00485 if (!reader.searchForKey(keyData,DUP_SEEK_ANY)) {
00486 BOOST_FAIL("Could not find key #" << i << ": " << record.key);
00487 }
00488 reader.getTupleAccessorForRead().unmarshal(tupleData);
00489 BOOST_CHECK_EQUAL(record.key,readKey());
00490 BOOST_CHECK_EQUAL(record.value,readValue());
00491 }
00492 }
00493
00494 void BTreeTest::testMultiKeySearches(uint nKey1, uint nKey2)
00495 {
00496
00497 StandardTypeDescriptorFactory stdTypeFactory;
00498 TupleAttributeDescriptor attrDesc(
00499 stdTypeFactory.newDataType(STANDARD_TYPE_INT_32));
00500 descriptor.tupleDescriptor.clear();
00501 descriptor.tupleDescriptor.push_back(attrDesc);
00502 descriptor.tupleDescriptor.push_back(attrDesc);
00503 descriptor.keyProjection.clear();
00504 descriptor.keyProjection.push_back(0);
00505
00506
00507 descriptor.tupleDescriptor.push_back(attrDesc);
00508 descriptor.keyProjection.push_back(1);
00509
00510 tupleAccessor.compute(descriptor.tupleDescriptor);
00511 recordBuf.reset(new FixedBuffer[tupleAccessor.getMaxByteCount()]);
00512
00513
00514
00515 descriptor.rootPageId = NULL_PAGE_ID;
00516 BTreeBuilder builder(descriptor,pRandomSegment);
00517
00518 tupleData.compute(descriptor.tupleDescriptor);
00519
00520 builder.createEmptyRoot();
00521 descriptor.rootPageId = builder.getRootPageId();
00522
00523 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment(
00524 pCache,
00525 1);
00526
00527 BTreeWriter writer(descriptor,scratchAccessor,false);
00528
00529 for (uint i = 0; i < nKey1; i++) {
00530 for (uint j = 0; j < nKey2; j++) {
00531 record.key = i;
00532 record.secondKey = j;
00533 record.value = i + j;
00534 marshalMultiKeyRecord();
00535 writer.insertTupleFromBuffer(recordBuf.get(), DUP_FAIL);
00536 }
00537 }
00538
00539
00540 TupleDescriptor searchKeyDesc;
00541 searchKeyDesc.push_back(attrDesc);
00542 keyData.compute(searchKeyDesc);
00543 keyData[0].pData = reinterpret_cast<PConstBuffer>(&record.key);
00544
00545
00546 BTreeReader reader(descriptor);
00547 for (uint i = 0; i < nKey1; ++i) {
00548 record.key = i;
00549
00550 if (!reader.searchForKey(keyData,DUP_SEEK_BEGIN)) {
00551 BOOST_FAIL("Could not find begin key #" << i);
00552 }
00553 reader.getTupleAccessorForRead().unmarshal(tupleData);
00554 BOOST_CHECK_EQUAL(i,readKey());
00555 BOOST_CHECK_EQUAL(0,readSecondKey());
00556 BOOST_CHECK_EQUAL(i,readMultiKeyValue());
00557
00558
00559
00560 reader.searchForKey(keyData,DUP_SEEK_END);
00561
00562 if (i == nKey1 - 1) {
00563 if (!reader.isSingular()) {
00564 BOOST_FAIL(
00565 "Should have reached end of tree for end key #" << i);
00566 }
00567 } else {
00568 reader.getTupleAccessorForRead().unmarshal(tupleData);
00569 BOOST_CHECK_EQUAL(i + 1,readKey());
00570 BOOST_CHECK_EQUAL(0,readSecondKey());
00571 BOOST_CHECK_EQUAL(i + 1,readMultiKeyValue());
00572 }
00573 }
00574 reader.endSearch();
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587 TupleData multiKeyData;
00588 multiKeyData.compute(builder.getKeyDescriptor());
00589 multiKeyData[0].pData = reinterpret_cast<PConstBuffer>(&record.key);
00590 multiKeyData[1].pData = reinterpret_cast<PConstBuffer>(&record.secondKey);
00591 for (uint i = 0; i < nKey1; i++) {
00592 for (uint j = 0; j < nKey2; j++) {
00593 record.key = i;
00594 record.secondKey = j;
00595 if (!writer.searchForKey(multiKeyData,DUP_SEEK_ANY)) {
00596 BOOST_FAIL("Could not find key #" << i);
00597 }
00598 writer.deleteCurrent();
00599
00600 record.secondKey = j + nKey2;
00601 record.value = i + j + nKey2;
00602 marshalMultiKeyRecord();
00603 writer.insertTupleFromBuffer(recordBuf.get(), DUP_FAIL);
00604 }
00605 }
00606 writer.endSearch();
00607 for (uint i = 0; i < nKey1; i++) {
00608 record.key = i;
00609 if (!reader.searchForKey(keyData,DUP_SEEK_BEGIN)) {
00610 BOOST_FAIL("Could not find begin key #" << i);
00611 }
00612 reader.getTupleAccessorForRead().unmarshal(tupleData);
00613 BOOST_CHECK_EQUAL(i,readKey());
00614 BOOST_CHECK_EQUAL(nKey2,readSecondKey());
00615 BOOST_CHECK_EQUAL(i + nKey2,readMultiKeyValue());
00616
00617 reader.searchForKey(keyData,DUP_SEEK_END);
00618
00619 if (i == nKey1 - 1) {
00620 if (!reader.isSingular()) {
00621 BOOST_FAIL(
00622 "Should have reached end of tree for end key #" << i);
00623 }
00624 } else {
00625 reader.getTupleAccessorForRead().unmarshal(tupleData);
00626 BOOST_CHECK_EQUAL(i + 1,readKey());
00627 BOOST_CHECK_EQUAL(nKey2,readSecondKey());
00628 BOOST_CHECK_EQUAL(i + 1 + nKey2,readMultiKeyValue());
00629 }
00630
00631 record.secondKey = 0;
00632 reader.searchForKey(multiKeyData,DUP_SEEK_END);
00633
00634 reader.getTupleAccessorForRead().unmarshal(tupleData);
00635 BOOST_CHECK_EQUAL(i,readKey());
00636 BOOST_CHECK_EQUAL(nKey2,readSecondKey());
00637 BOOST_CHECK_EQUAL(i + nKey2,readMultiKeyValue());
00638 }
00639 }
00640
00641 FENNEL_UNIT_TEST_SUITE(BTreeTest);
00642
00643