00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "fennel/common/CommonPreamble.h"
00025 #include "fennel/btree/BTreeBuilder.h"
00026 #include "fennel/btree/BTreeBuildLevel.h"
00027 #include "fennel/btree/BTreeAccessBaseImpl.h"
00028 #include "fennel/btree/BTreeReader.h"
00029 #include "fennel/segment/SegInputStream.h"
00030 #include "fennel/segment/SegOutputStream.h"
00031
00032 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/btree/BTreeBuilder.cpp#12 $");
00033
00034 BTreeBuilder::BTreeBuilder(
00035 BTreeDescriptor const &descriptor,
00036 SharedSegment pTempSegmentInit)
00037 : BTreeAccessBase(descriptor)
00038 {
00039 pTempSegment = pTempSegmentInit;
00040 }
00041
00042 BTreeBuilder::~BTreeBuilder()
00043 {
00044 }
00045
00046 uint BTreeBuilder::calculateNodesOnLevel(
00047 uint nEntries,uint nEntriesPerNode)
00048 {
00049 uint nNodes = nEntries / nEntriesPerNode;
00050 if (nEntries % nEntriesPerNode) {
00051
00052 ++nNodes;
00053 }
00054 return nNodes;
00055 }
00056
00057 void BTreeBuilder::build(
00058 ByteInputStream &sortedInputStream,
00059 RecordNum nEntriesTotal,
00060 double fillFactor)
00061 {
00062 assert(fillFactor <= 1.0);
00063 assert(fillFactor > 0);
00064 try {
00065 if (pLeafNodeAccessor->hasFixedWidthEntries()
00066 && pNonLeafNodeAccessor->hasFixedWidthEntries())
00067 {
00068 buildBalanced(sortedInputStream,0,nEntriesTotal,fillFactor);
00069 } else if (pNonLeafNodeAccessor->hasFixedWidthEntries()) {
00070 buildTwoPass(
00071 sortedInputStream,nEntriesTotal,fillFactor);
00072 } else {
00073 assert(!pLeafNodeAccessor->hasFixedWidthEntries());
00074 buildUnbalanced(sortedInputStream,nEntriesTotal,fillFactor);
00075 }
00076 } catch (...) {
00077 try {
00078 levels.clear();
00079 } catch (...) {
00080
00081 }
00082 throw;
00083 }
00084 levels.clear();
00085 }
00086
00087 void BTreeBuilder::buildTwoPass(
00088 ByteInputStream &sortedInputStream,
00089 RecordNum nEntriesTotal,
00090 double fillFactor)
00091 {
00092
00093 uint cbReserved = getSegment()->getUsablePageSize() - sizeof(BTreeNode);
00094 cbReserved = uint(cbReserved*(1-fillFactor));
00095
00096 levels.resize(1);
00097 BTreeNodeAccessor &nodeAccessor = *pLeafNodeAccessor;
00098 assert(!nodeAccessor.hasFixedWidthEntries());
00099 VariableBuildLevel *pLeafLevel = new VariableBuildLevel(*this,nodeAccessor);
00100 levels[0].reset(pLeafLevel);
00101
00102 BTreeBuildLevel &level = getLevel(0);
00103 level.nEntriesTotal = nEntriesTotal;
00104 level.cbReserved = cbReserved;
00105
00106 level.allocatePage();
00107
00108
00109
00110 level.processInput(sortedInputStream);
00111
00112 level.indexLastKey(true);
00113
00114
00115
00116 RecordNum nEntriesParent = level.iNode + 1;
00117
00118 if (nEntriesParent == 1) {
00119
00120 swapRoot();
00121 return;
00122 }
00123
00124
00125
00126 SharedSegInputStream pParentInputStream =
00127 pLeafLevel->getParentKeyStream();
00128 assert(pParentInputStream);
00129
00130 buildBalanced(*pParentInputStream,1,nEntriesParent,fillFactor);
00131 }
00132
00133 void BTreeBuilder::swapRoot()
00134 {
00135 BTreeBuildLevel &rootLevel = getLevel(getRootHeight());
00136 if (getRootPageId() == NULL_PAGE_ID) {
00137
00138 treeDescriptor.rootPageId = rootLevel.pageId;
00139 return;
00140 }
00141
00142
00143
00144 BTreePageLock reusedRootPageLock(treeDescriptor.segmentAccessor);
00145 reusedRootPageLock.lockExclusive(getRootPageId());
00146 BTreeNode &reusedRoot = reusedRootPageLock.getNodeForWrite();
00147 assert(!reusedRoot.nEntries);
00148 reusedRootPageLock.swapBuffers(rootLevel.pageLock);
00149
00150
00151 rootLevel.pageLock.deallocateLockedPage();
00152 }
00153
00154 void BTreeBuilder::buildUnbalanced(
00155 ByteInputStream &sortedInputStream,
00156 RecordNum nEntriesTotal,
00157 double fillFactor)
00158 {
00159
00160
00161 uint cbReserved = getSegment()->getUsablePageSize() - sizeof(BTreeNode);
00162 cbReserved = uint(cbReserved*(1-fillFactor));
00163
00164
00165 growTree();
00166 BTreeBuildLevel &level = getLevel(0);
00167 level.cbReserved = cbReserved;
00168 level.nEntriesTotal = nEntriesTotal;
00169 level.processInput(sortedInputStream);
00170
00171
00172
00173
00174 for (uint i = 0; i < levels.size(); ++i) {
00175 BTreeBuildLevel &level = getLevel(i);
00176 level.indexLastKey(true);
00177 }
00178
00179 swapRoot();
00180 }
00181
00182 void BTreeBuilder::buildBalanced(
00183 ByteInputStream &sortedInputStream,
00184 uint iInputLevel,
00185 RecordNum nEntriesTotal,
00186 double fillFactor)
00187 {
00188
00189
00190
00191 uint nEntriesPerNonLeaf =
00192 getSegment()->getUsablePageSize() - sizeof(BTreeNode);
00193 nEntriesPerNonLeaf /=
00194 pNonLeafNodeAccessor->getEntryByteCount(
00195 pNonLeafNodeAccessor->tupleAccessor.getMaxByteCount());
00196
00197 uint nEntriesPerLeaf =
00198 getSegment()->getUsablePageSize() - sizeof(BTreeNode);
00199 nEntriesPerLeaf /=
00200 pLeafNodeAccessor->getEntryByteCount(
00201 pLeafNodeAccessor->tupleAccessor.getMaxByteCount());
00202
00203 nEntriesPerNonLeaf = uint(nEntriesPerNonLeaf*fillFactor);
00204 nEntriesPerLeaf = uint(nEntriesPerLeaf*fillFactor);
00205
00206 if (!nEntriesPerNonLeaf) {
00207 nEntriesPerNonLeaf = 1;
00208 }
00209 if (!nEntriesPerLeaf) {
00210 nEntriesPerLeaf = 1;
00211 }
00212
00213
00214
00215
00216
00217
00218 RecordNum nEntriesFull = iInputLevel ? nEntriesPerNonLeaf : nEntriesPerLeaf;
00219 uint nLevels = iInputLevel + 1;
00220 while (nEntriesFull < nEntriesTotal) {
00221 ++nLevels;
00222 nEntriesFull *= nEntriesPerNonLeaf;
00223 }
00224 levels.resize(nLevels);
00225
00226
00227
00228
00229
00230 RecordNum nEntriesInLevel = nEntriesTotal;
00231 for (uint i = iInputLevel; i < levels.size(); i++) {
00232 BTreeNodeAccessor &nodeAccessor =
00233 i ? *pNonLeafNodeAccessor : *pLeafNodeAccessor;
00234
00235 assert(nodeAccessor.hasFixedWidthEntries());
00236 levels[i].reset(new FixedBuildLevel(*this,nodeAccessor));
00237
00238 BTreeBuildLevel &level = getLevel(i);
00239 level.iLevel = i;
00240 level.nEntriesTotal = nEntriesInLevel;
00241
00242
00243 nEntriesInLevel = calculateNodesOnLevel(
00244 nEntriesInLevel,
00245 i ? nEntriesPerNonLeaf : nEntriesPerLeaf);
00246
00247 if (i == getRootHeight()) {
00248
00249
00250 level.nEntriesPerNode = level.nEntriesTotal;
00251 if (getRootPageId() == NULL_PAGE_ID) {
00252 level.allocatePage();
00253 treeDescriptor.rootPageId = level.pageId;
00254 } else {
00255
00256
00257 level.pageId = getRootPageId();
00258 level.pageLock.lockExclusive(level.pageId);
00259 BTreeNode &node = level.pageLock.getNodeForWrite();
00260 assert(!node.nEntries);
00261 level.nodeAccessor.clearNode(
00262 node,getSegment()->getUsablePageSize());
00263 node.height = i;
00264 }
00265 } else {
00266
00267 level.allocatePage();
00268 }
00269 if (i) {
00270
00271
00272 BTreeBuildLevel &childLevel = getLevel(i - 1);
00273 childLevel.nEntriesPerNode = calculateChildEntriesPerNode(
00274 level.nEntriesTotal,
00275 childLevel.nEntriesTotal,
00276 0);
00277 }
00278 }
00279
00280
00281
00282 assert(nEntriesInLevel == 1);
00283
00284
00285 getLevel(iInputLevel).processInput(sortedInputStream);
00286
00287
00288 for (uint i = iInputLevel; i < levels.size(); ++i) {
00289 BTreeBuildLevel &level = getLevel(i);
00290 level.indexLastKey(true);
00291 assert(level.isFinished());
00292 }
00293 }
00294
00295 uint BTreeBuilder::calculateChildEntriesPerNode(
00296 RecordNum parentLevelTotalEntries,
00297 RecordNum childLevelTotalEntries,
00298 RecordNum parentLevelProcessedEntries)
00299 {
00300
00301
00302
00303 double fanout =
00304 double(childLevelTotalEntries) / double(parentLevelTotalEntries);
00305
00306 uint childLevelNewTotal;
00307 if (parentLevelProcessedEntries == (parentLevelTotalEntries - 1)) {
00308
00309 childLevelNewTotal = childLevelTotalEntries;
00310 } else {
00311 childLevelNewTotal =
00312 uint(0.5 + fanout * (parentLevelProcessedEntries + 1));
00313 }
00314
00315
00316
00317 uint childLevelPrevTotal = uint(0.5 + fanout * parentLevelProcessedEntries);
00318
00319 uint n = childLevelNewTotal - childLevelPrevTotal;
00320 assert(n > 0);
00321 return n;
00322 }
00323
00324 void BTreeBuilder::createEmptyRoot()
00325 {
00326 assert(getRootPageId() == NULL_PAGE_ID);
00327 BTreePageLock pageLock;
00328 pageLock.accessSegment(treeDescriptor.segmentAccessor);
00329 treeDescriptor.rootPageId = pageLock.allocatePage(getPageOwnerId());
00330 BTreeNode &node = pageLock.getNodeForWrite();
00331 pLeafNodeAccessor->clearNode(
00332 node,getSegment()->getUsablePageSize());
00333 }
00334
00335 void BTreeBuilder::growTree()
00336 {
00337 BTreeNodeAccessor &nodeAccessor =
00338 levels.size() ? *pNonLeafNodeAccessor : *pLeafNodeAccessor;
00339 levels.resize(levels.size() + 1);
00340 levels[getRootHeight()].reset(new DynamicBuildLevel(*this,nodeAccessor));
00341 BTreeBuildLevel &level = *levels.back();
00342 level.iLevel = getRootHeight();
00343 if (level.iLevel) {
00344
00345 level.cbReserved = getLevel(level.iLevel - 1).cbReserved;
00346 }
00347 level.allocatePage();
00348 }
00349
00350
00351 void BTreeBuilder::truncate(
00352 bool rootless, TupleProjection const *pLeafPageIdProj)
00353 {
00354 if (pLeafPageIdProj) {
00355 truncateExternal(*pLeafPageIdProj);
00356 }
00357 BTreePageLock pageLock;
00358 pageLock.accessSegment(treeDescriptor.segmentAccessor);
00359
00360 pageLock.lockExclusive(getRootPageId());
00361
00362
00363 BTreeNode const &rootReadOnly = pageLock.getNodeForRead();
00364 if (!rootReadOnly.height) {
00365 if (rootless) {
00366 pageLock.deallocateLockedPage();
00367 treeDescriptor.rootPageId = NULL_PAGE_ID;
00368 return;
00369 }
00370 if (!rootReadOnly.nEntries) {
00371
00372 return;
00373 }
00374 }
00375
00376 BTreeNode &root = pageLock.getNodeForWrite();
00377 if (root.height) {
00378 truncateChildren(root);
00379 }
00380 if (rootless) {
00381 pageLock.deallocateLockedPage();
00382 treeDescriptor.rootPageId = NULL_PAGE_ID;
00383 } else {
00384 pLeafNodeAccessor->clearNode(
00385 root,getSegment()->getUsablePageSize());
00386 }
00387 }
00388
00389 void BTreeBuilder::truncateChildren(BTreeNode const &node)
00390 {
00391 assert(node.height);
00392 assert(node.nEntries);
00393 PageId pageId = getChild(node,0);
00394 BTreePageLock pageLock;
00395 pageLock.accessSegment(treeDescriptor.segmentAccessor);
00396 if (node.height > 1) {
00397 pageLock.lockExclusive(pageId);
00398 truncateChildren(pageLock.getNodeForRead());
00399 pageLock.unlock();
00400 }
00401 while (pageId != NULL_PAGE_ID) {
00402 PageId nextPageId = getRightSibling(pageId);
00403 pageLock.deallocateUnlockedPage(pageId);
00404 pageId = nextPageId;
00405 }
00406 }
00407
00408 void BTreeBuilder::truncateExternal(TupleProjection const &leafPageIdProj)
00409 {
00410
00411
00412
00413 BTreeReader reader(treeDescriptor);
00414 TupleProjectionAccessor projAccessor;
00415 projAccessor.bind(
00416 reader.getTupleAccessorForRead(),
00417 leafPageIdProj);
00418 TupleDescriptor projDesc;
00419 projDesc.projectFrom(treeDescriptor.tupleDescriptor, leafPageIdProj);
00420 TupleData projData(projDesc);
00421 BTreePageLock pageLock;
00422 pageLock.accessSegment(treeDescriptor.segmentAccessor);
00423 if (reader.searchFirst()) {
00424 do {
00425 projAccessor.unmarshal(projData);
00426 for (uint i = 0; i < projData.size(); ++i) {
00427 if (!projData[i].pData) {
00428 continue;
00429 }
00430 PageId pageId = *reinterpret_cast<PageId const *>(
00431 projData[i].pData);
00432 pageLock.deallocateUnlockedPage(pageId);
00433 }
00434 } while (reader.searchNext());
00435 }
00436 }
00437
00438 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/btree/BTreeBuilder.cpp#12 $");
00439
00440