VersionedRandomAllocationSegment.cpp

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/segment/VersionedRandomAllocationSegment.cpp#19 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 The Eigenbase Project
00005 // Copyright (C) 2005-2009 SQLstream, Inc.
00006 // Copyright (C) 2005-2009 LucidEra, Inc.
00007 // Portions Copyright (C) 1999-2009 John V. Sichi
00008 //
00009 // This program is free software; you can redistribute it and/or modify it
00010 // under the terms of the GNU General Public License as published by the Free
00011 // Software Foundation; either version 2 of the License, or (at your option)
00012 // any later version approved by The Eigenbase Project.
00013 //
00014 // This program is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 */
00023 
00024 #include "fennel/common/CommonPreamble.h"
00025 #include "fennel/common/AbortExcn.h"
00026 #include "fennel/common/FennelResource.h"
00027 #include "fennel/segment/RandomAllocationSegmentBaseImpl.h"
00028 #include "fennel/segment/VersionedRandomAllocationSegmentImpl.h"
00029 
00030 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/segment/VersionedRandomAllocationSegment.cpp#19 $");
00031 
00032 VersionedRandomAllocationSegment::VersionedRandomAllocationSegment(
00033     SharedSegment delegateSegment,
00034     SharedSegment pTempSegmentInit)
00035     : RandomAllocationSegmentBase(delegateSegment)
00036 {
00037     nPagesPerExtent =
00038         (getUsablePageSize() - sizeof(VersionedExtentAllocationNode))
00039         / sizeof(VersionedPageEntry);
00040 
00041     // + 1 is for SegAllocNode itself
00042     nPagesPerSegAlloc = nPagesPerExtent*nExtentsPerSegAlloc + 1;
00043 
00044     pTempSegment = pTempSegmentInit;
00045 }
00046 
00047 void VersionedRandomAllocationSegment::initForUse()
00048 {
00049     // Since we will be accessing SegmentAllocationNode pages, we need to
00050     // acquire a mutex on the allocationNodeMap.
00051     SXMutexSharedGuard mapGuard(mapMutex);
00052 
00053     RandomAllocationSegmentBase::initForUse();
00054 }
00055 
00056 void VersionedRandomAllocationSegment::formatPageExtents(
00057     SegmentAllocationNode &segAllocNode,
00058     ExtentNum &extentNum)
00059 {
00060     formatPageExtentsTemplate<
00061             VersionedExtentAllocationNode,
00062             VersionedExtentAllocLock,
00063             VersionedPageEntry>(
00064         segAllocNode,
00065         extentNum);
00066 }
00067 
00068 PageId VersionedRandomAllocationSegment::allocatePageId(PageOwnerId ownerId)
00069 {
00070     return allocatePageIdFromSegment(ownerId, pTempSegment);
00071 }
00072 
00073 PageId VersionedRandomAllocationSegment::getSegAllocPageIdForWrite(
00074     PageId origSegAllocPageId)
00075 {
00076     return getTempAllocNodePage<SegAllocLock>(origSegAllocPageId, true);
00077 }
00078 
00079 void VersionedRandomAllocationSegment::undoSegAllocPageWrite(
00080     PageId segAllocPageId)
00081 {
00082     SXMutexExclusiveGuard mapGuard(mapMutex);
00083 
00084     NodeMapConstIter iter = allocationNodeMap.find(segAllocPageId);
00085     assert(iter != allocationNodeMap.end());
00086     SharedModifiedAllocationNode pModAllocNode = iter->second;
00087     pModAllocNode->updateCount--;
00088 }
00089 
00090 PageId VersionedRandomAllocationSegment::getSegAllocPageIdForRead(
00091     PageId origSegAllocPageId,
00092     SharedSegment &allocNodeSegment)
00093 {
00094     return findAllocPageIdForRead(origSegAllocPageId, allocNodeSegment);
00095 }
00096 
00097 PageId VersionedRandomAllocationSegment::getExtAllocPageIdForRead(
00098     ExtentNum extentNum,
00099     SharedSegment &allocNodeSegment)
00100 {
00101     return
00102         findAllocPageIdForRead(
00103             getExtentAllocPageId(extentNum),
00104             allocNodeSegment);
00105 }
00106 
00107 PageId VersionedRandomAllocationSegment::findAllocPageIdForRead(
00108     PageId origAllocNodePageId,
00109     SharedSegment &allocNodeSegment)
00110 {
00111     // If the allocation node corresponding to the desired page has been
00112     // modified, it will be in our map.  If so, retrieve the pageId
00113     // corresponding to the modified allocation node, and access that
00114     // page from the temporary segment.  Otherwise, access the allocation
00115     // node from permanent storage.
00116 
00117     assert(mapMutex.isLocked(LOCKMODE_S));
00118     PageId tempAllocNodePageId;
00119     NodeMapConstIter iter = allocationNodeMap.find(origAllocNodePageId);
00120     if (iter == allocationNodeMap.end()) {
00121         tempAllocNodePageId = origAllocNodePageId;
00122         allocNodeSegment = getTracingSegment();
00123     } else {
00124         tempAllocNodePageId = iter->second->tempPageId;
00125         allocNodeSegment = pTempSegment;
00126     }
00127 
00128     return tempAllocNodePageId;
00129 }
00130 
00131 PageId VersionedRandomAllocationSegment::allocateFromNewExtent(
00132     ExtentNum extentNum,
00133     PageOwnerId ownerId)
00134 {
00135     return
00136         allocateFromNewExtentTemplate<
00137                 VersionedExtentAllocationNode,
00138                 VersionedExtentAllocLock,
00139                 VersionedPageEntry>(
00140             extentNum,
00141             ownerId,
00142             pTempSegment);
00143 }
00144 
00145 PageId VersionedRandomAllocationSegment::allocateFromExtent(
00146     ExtentNum extentNum,
00147     PageOwnerId ownerId)
00148 {
00149     return
00150         allocateFromExtentTemplate<
00151                 VersionedExtentAllocationNode,
00152                 VersionedExtentAllocLock,
00153                 VersionedPageEntry>(
00154             extentNum,
00155             ownerId,
00156             pTempSegment);
00157 }
00158 
00159 void VersionedRandomAllocationSegment::deallocatePageRange(
00160     PageId startPageId,
00161     PageId endPageId)
00162 {
00163     assert(startPageId == endPageId);
00164 
00165     if (startPageId == NULL_PAGE_ID) {
00166         format();
00167     } else {
00168         // Note that we cannot discard deferred-deallocation pages from cache
00169         // because they really haven't been freed yet and still may be
00170         // referenced by other threads.  The pages will be removed from the
00171         // cache when they are actually freed.
00172 
00173         // Acquire mutex exclusively to prevent another thread from trying
00174         // to do the actual free of the same page, if it's an old page.
00175         SXMutexExclusiveGuard deallocationGuard(deallocationMutex);
00176 
00177         // Simply mark the page as deallocation-deferred.  The actual
00178         // deallocation will be done by calls to deallocateOldPages().
00179         deferDeallocation(startPageId);
00180     }
00181 }
00182 
00183 void VersionedRandomAllocationSegment::deferDeallocation(PageId pageId)
00184 {
00185     ExtentNum extentNum;
00186     BlockNum iPageInExtent;
00187     uint iSegAlloc;
00188     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00189     assert(iPageInExtent);
00190 
00191     SegmentAccessor segAccessor(pTempSegment, pCache);
00192     VersionedExtentAllocLock extentAllocLock(segAccessor);
00193     extentAllocLock.lockExclusive(getExtAllocPageIdForWrite(extentNum));
00194     VersionedExtentAllocationNode &extentNode =
00195         extentAllocLock.getNodeForWrite();
00196     VersionedPageEntry &pageEntry = extentNode.getPageEntry(iPageInExtent);
00197     assert(
00198         pageEntry.ownerId != UNALLOCATED_PAGE_OWNER_ID &&
00199         !isDeallocatedPageOwnerId(pageEntry.ownerId));
00200     // Set the deallocation txnId to an arbitrary value, for now.  It will
00201     // get overwritten with a real txnId at commit time.
00202     pageEntry.ownerId = makeDeallocatedPageOwnerId(TxnId(0));
00203 }
00204 
00205 void VersionedRandomAllocationSegment::freePageEntry(
00206     ExtentNum extentNum,
00207     BlockNum iPageInExtent)
00208 {
00209     freePageEntryTemplate<
00210             VersionedExtentAllocationNode,
00211             VersionedExtentAllocLock,
00212             VersionedPageEntry>(
00213         extentNum,
00214         iPageInExtent);
00215 }
00216 
00217 void VersionedRandomAllocationSegment::markPageEntryUnused(
00218     PageEntry &pageEntry)
00219 {
00220     RandomAllocationSegmentBase::markPageEntryUnused(pageEntry);
00221 
00222     VersionedPageEntry *pVersionedPageEntry =
00223         reinterpret_cast<VersionedPageEntry *>(&pageEntry);
00224     pVersionedPageEntry->versionChainPageId = NULL_PAGE_ID;
00225     pVersionedPageEntry->allocationCsn = NULL_TXN_ID;
00226 }
00227 
00228 PageId VersionedRandomAllocationSegment::getPageSuccessor(PageId pageId)
00229 {
00230     VersionedPageEntry pageEntry;
00231 
00232     getLatestPageEntryCopy(pageId, pageEntry);
00233     return pageEntry.successorId;
00234 }
00235 
00236 void VersionedRandomAllocationSegment::setPageSuccessor(
00237     PageId pageId, PageId successorId)
00238 {
00239     setPageSuccessorTemplate<
00240             VersionedExtentAllocationNode,
00241             VersionedExtentAllocLock>(
00242         pageId,
00243         successorId,
00244         pTempSegment);
00245 }
00246 
00247 bool VersionedRandomAllocationSegment::isPageIdAllocated(PageId pageId)
00248 {
00249     return testPageId(pageId, true, false);
00250 }
00251 
00252 bool VersionedRandomAllocationSegment::isPageIdAllocateCommitted(PageId pageId)
00253 {
00254     return testPageId(pageId, true, true);
00255 }
00256 
00257 bool VersionedRandomAllocationSegment::isPageIdValid(PageId pageId)
00258 {
00259     return testPageId(pageId, false, false);
00260 }
00261 
00262 PageId VersionedRandomAllocationSegment::getExtAllocPageIdForWrite(
00263     ExtentNum extentNum)
00264 {
00265     return
00266         getTempAllocNodePage<VersionedExtentAllocLock>(
00267             getExtentAllocPageId(extentNum),
00268             false);
00269 }
00270 
00271 PageOwnerId VersionedRandomAllocationSegment::getPageOwnerId(
00272     PageId pageId,
00273     bool thisSegment)
00274 {
00275     return getPageOwnerIdTemplate<VersionedPageEntry>(pageId, thisSegment);
00276 }
00277 
00278 void VersionedRandomAllocationSegment::getPageEntryCopy(
00279     PageId pageId,
00280     PageEntry &pageEntryCopy,
00281     bool isAllocated,
00282     bool thisSegment)
00283 {
00284     // We need to get a copy of the page entry rather than a reference
00285     // because the page entry may originate from a temporary page, which
00286     // can be freed by another thread.  By holding the mutex on the
00287     // allocationNodeMap while we're retrieving the copy, we're ensured that
00288     // the page cannot be freed until we exit this method.
00289     SXMutexSharedGuard mapGuard(mapMutex);
00290 
00291     VersionedPageEntry *pVersionedPageEntry =
00292         static_cast<VersionedPageEntry *>(&pageEntryCopy);
00293     getPageEntryCopyTemplate<
00294             VersionedExtentAllocationNode,
00295             VersionedExtentAllocLock,
00296             VersionedPageEntry>(
00297         pageId,
00298         *pVersionedPageEntry,
00299         isAllocated,
00300         thisSegment);
00301 }
00302 
00303 void VersionedRandomAllocationSegment::getLatestPageEntryCopy(
00304     PageId pageId,
00305     VersionedPageEntry &pageEntryCopy)
00306 {
00307     getPageEntryCopy(pageId, pageEntryCopy, true, false);
00308 }
00309 
00310 void VersionedRandomAllocationSegment::getCommittedPageEntryCopy(
00311     PageId pageId,
00312     VersionedPageEntry &pageEntryCopy)
00313 {
00314     getPageEntryCopy(pageId, pageEntryCopy, true, true);
00315 }
00316 
00317 void VersionedRandomAllocationSegment::initPageEntry(
00318     PageId pageId,
00319     PageId versionChainId,
00320     TxnId allocationCsn)
00321 {
00322     assert(isPageIdAllocated(pageId));
00323 
00324     ExtentNum extentNum;
00325     BlockNum iPageInExtent;
00326     uint iSegAlloc;
00327     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00328     assert(iPageInExtent);
00329 
00330     SegmentAccessor segAccessor(pTempSegment, pCache);
00331     VersionedExtentAllocLock extentAllocLock(segAccessor);
00332     extentAllocLock.lockExclusive(getExtAllocPageIdForWrite(extentNum));
00333     VersionedExtentAllocationNode &extentNode =
00334         extentAllocLock.getNodeForWrite();
00335     VersionedPageEntry &pageEntry =
00336         extentNode.getPageEntry(iPageInExtent);
00337     assert(pageEntry.ownerId != UNALLOCATED_PAGE_OWNER_ID);
00338     pageEntry.versionChainPageId = versionChainId;
00339     if (allocationCsn != NULL_TXN_ID) {
00340         pageEntry.allocationCsn = allocationCsn;
00341     }
00342 }
00343 
00344 void VersionedRandomAllocationSegment::chainPageEntries(
00345     PageId pageId,
00346     PageId versionChainId,
00347     PageId successorId)
00348 {
00349     chainPageEntries(pageId, versionChainId, successorId, false);
00350 }
00351 
00352 void VersionedRandomAllocationSegment::chainPageEntries(
00353     PageId pageId,
00354     PageId versionChainId,
00355     PageId successorId,
00356     bool thisSegment)
00357 {
00358     assert(isPageIdAllocated(pageId));
00359 
00360     ExtentNum extentNum;
00361     BlockNum iPageInExtent;
00362     uint iSegAlloc;
00363     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00364     assert(iPageInExtent);
00365 
00366     // Update the pageEntry either in the permanent or temp segment
00367     // depending on the "thisSegment" parameter passed in
00368     SharedSegment allocNodeSegment =
00369         (thisSegment) ? getTracingSegment() : pTempSegment;
00370     SegmentAccessor segAccessor(allocNodeSegment, pCache);
00371     VersionedExtentAllocLock extentAllocLock(segAccessor);
00372     PageId extentPageId =
00373         (thisSegment) ?
00374             getExtentAllocPageId(extentNum) :
00375             getExtAllocPageIdForWrite(extentNum);
00376 
00377     extentAllocLock.lockExclusive(extentPageId);
00378     VersionedExtentAllocationNode &extentNode =
00379         extentAllocLock.getNodeForWrite();
00380     VersionedPageEntry &pageEntry =
00381         extentNode.getPageEntry(iPageInExtent);
00382     if (successorId != NULL_PAGE_ID) {
00383         pageEntry.successorId = successorId;
00384     }
00385     assert(versionChainId != NULL_PAGE_ID);
00386     pageEntry.versionChainPageId = versionChainId;
00387 }
00388 
00389 void VersionedRandomAllocationSegment::updateAllocNodes(
00390     ModifiedPageEntryMap const &modifiedPageEntryMap,
00391     TxnId commitCsn,
00392     bool commit,
00393     SharedSegment pOrigSegment)
00394 {
00395     SXMutexExclusiveGuard mapGuard(mapMutex);
00396 
00397     for (ModifiedPageEntryMapIter iter = modifiedPageEntryMap.begin();
00398         iter != modifiedPageEntryMap.end();
00399         iter++)
00400     {
00401         PageId pageId = iter->first;
00402         SharedModifiedPageEntry pModEntry = iter->second;
00403 
00404         assert(isPageIdAllocated(pageId));
00405 
00406         ExtentNum extentNum;
00407         BlockNum iPageInExtent;
00408         uint iSegAlloc;
00409         splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00410         assert(iPageInExtent);
00411 
00412         // First make sure the page/extent entry we're going to be updating
00413         // has been allocated
00414         allocateAllocNodes(iSegAlloc, NULL_PAGE_ID, extentNum);
00415 
00416         // No need to order updates to the extent and page entries because no
00417         // other thread should be modifying the permanent allocation node
00418         // pages at the same time.
00419         if ((pModEntry->lastModType == ModifiedPageEntry::ALLOCATED) ||
00420            (pModEntry->lastModType == ModifiedPageEntry::DEALLOCATED))
00421         {
00422             updateExtentEntry(
00423                 iSegAlloc,
00424                 extentNum,
00425                 pModEntry->allocationCount,
00426                 commit);
00427         }
00428         updatePageEntry(
00429             pageId,
00430             extentNum,
00431             iPageInExtent,
00432             pModEntry,
00433             commitCsn,
00434             commit,
00435             pOrigSegment);
00436     }
00437 
00438     // Deallocate any temp allocation node pages corresponding to extent
00439     // allocation nodes that no longer contain any uncommitted updates.
00440     // Segment allocation nodes are not deallocated because those nodes can
00441     // be accessed frequently, especially if all extents on the page are full.
00442     ModifiedAllocationNodeMap::iterator iter = allocationNodeMap.begin();
00443     while (iter != allocationNodeMap.end()) {
00444         SharedModifiedAllocationNode pModNode = iter->second;
00445         if (pModNode->updateCount == 0 && !pModNode->isSegAllocNode) {
00446             PageId pageId = iter->first;
00447             iter++;
00448             freeTempPage(pageId, pModNode->tempPageId);
00449         } else {
00450             iter++;
00451         }
00452     }
00453 }
00454 
00455 void VersionedRandomAllocationSegment::updatePageEntry(
00456     PageId pageId,
00457     ExtentNum extentNum,
00458     uint iPageInExtent,
00459     SharedModifiedPageEntry pModEntry,
00460     TxnId commitCsn,
00461     bool commit,
00462     SharedSegment pOrigSegment)
00463 {
00464     assert(mapMutex.isLocked(LOCKMODE_X));
00465 
00466     // Update the extent allocation page, copying the contents from the
00467     // temporary page in the case of a commit and vice versa for a rollback.
00468 
00469     PageId extentPageId = getExtentAllocPageId(extentNum);
00470     NodeMapConstIter iter = allocationNodeMap.find(extentPageId);
00471     assert(iter != allocationNodeMap.end());
00472     SharedModifiedAllocationNode pModNode = iter->second;
00473 
00474     if (commit) {
00475         copyPageEntryFromTemp(
00476             pageId,
00477             extentPageId,
00478             pModNode->tempPageId,
00479             iPageInExtent,
00480             pModEntry->lastModType,
00481             commitCsn,
00482             pModEntry->ownerId);
00483     } else {
00484         // In the case of a rollback of a newly allocated page, remove the
00485         // page from the cache
00486         if (pModEntry->lastModType == ModifiedPageEntry::ALLOCATED) {
00487             pCache->discardPage(pOrigSegment->translatePageId(pageId));
00488         }
00489 
00490         copyPageEntryToTemp(extentPageId, pModNode->tempPageId, iPageInExtent);
00491     }
00492 
00493     pModNode->updateCount -= pModEntry->updateCount;
00494 }
00495 
00496 void VersionedRandomAllocationSegment::copyPageEntryFromTemp(
00497     PageId pageId,
00498     PageId origPageId,
00499     PageId tempPageId,
00500     BlockNum iPageInExtent,
00501     ModifiedPageEntry::ModType lastModType,
00502     TxnId commitCsn,
00503     PageOwnerId ownerId)
00504 {
00505     SegmentAccessor tempAccessor(pTempSegment, pCache);
00506     VersionedExtentAllocLock tempAllocLock(tempAccessor);
00507     tempAllocLock.lockExclusive(tempPageId);
00508     VersionedExtentAllocationNode &tempExtentNode =
00509         tempAllocLock.getNodeForWrite();
00510     VersionedPageEntry &tempPageEntry =
00511         tempExtentNode.getPageEntry(iPageInExtent);
00512 
00513     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00514     VersionedExtentAllocLock extentAllocLock(selfAccessor);
00515     extentAllocLock.lockExclusive(origPageId);
00516     VersionedExtentAllocationNode &extentNode =
00517         extentAllocLock.getNodeForWrite();
00518     VersionedPageEntry &pageEntry =
00519         extentNode.getPageEntry(iPageInExtent);
00520 
00521     // Update the temp page entry's csn and ownerId if this is a new page
00522     // allocation.  We need to update the temp entry because we may still
00523     // need to use that temp page.
00524     if (lastModType == ModifiedPageEntry::ALLOCATED) {
00525         assert(tempPageEntry.ownerId == UNCOMMITTED_PAGE_OWNER_ID);
00526         tempPageEntry.allocationCsn = commitCsn;
00527         tempPageEntry.ownerId = ownerId;
00528     } else if (lastModType == ModifiedPageEntry::DEALLOCATED) {
00529         assert(isDeallocatedPageOwnerId(tempPageEntry.ownerId));
00530         tempPageEntry.ownerId = makeDeallocatedPageOwnerId(commitCsn);
00531     }
00532     pageEntry = tempPageEntry;
00533 }
00534 
00535 void VersionedRandomAllocationSegment::copyPageEntryToTemp(
00536     PageId origPageId,
00537     PageId tempPageId,
00538     BlockNum iPageInExtent)
00539 {
00540     SegmentAccessor tempAccessor(pTempSegment, pCache);
00541     VersionedExtentAllocLock tempAllocLock(tempAccessor);
00542     tempAllocLock.lockExclusive(tempPageId);
00543     VersionedExtentAllocationNode &tempExtentNode =
00544         tempAllocLock.getNodeForWrite();
00545     VersionedPageEntry &tempPageEntry =
00546         tempExtentNode.getPageEntry(iPageInExtent);
00547 
00548     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00549     VersionedExtentAllocLock extentAllocLock(selfAccessor);
00550     extentAllocLock.lockShared(origPageId);
00551     VersionedExtentAllocationNode const &extentNode =
00552         extentAllocLock.getNodeForRead();
00553     VersionedPageEntry const &pageEntry =
00554         extentNode.getPageEntry(iPageInExtent);
00555 
00556     tempPageEntry = pageEntry;
00557 }
00558 
00559 void VersionedRandomAllocationSegment::updateExtentEntry(
00560     uint iSegAlloc,
00561     ExtentNum extentNum,
00562     uint allocationCount,
00563     bool commit)
00564 {
00565     assert(mapMutex.isLocked(LOCKMODE_X));
00566 
00567     // If the page was newly allocated, we need to update the
00568     // SegmentAllocationNode
00569 
00570     if (allocationCount) {
00571         // Update the permanent page if we're committing.  Otherwise, update
00572         // the temporary page, reverting the allocations/deallocations.
00573         PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
00574         NodeMapConstIter iter = allocationNodeMap.find(segAllocPageId);
00575         assert(iter != allocationNodeMap.end());
00576         SharedModifiedAllocationNode pModNode = iter->second;
00577         SharedSegment allocNodeSegment;
00578         PageId segPageId;
00579         if (commit) {
00580             allocNodeSegment = getTracingSegment();
00581             segPageId = segAllocPageId;
00582         } else {
00583             allocNodeSegment = pTempSegment;
00584             segPageId = pModNode->tempPageId;
00585         }
00586 
00587         SegmentAccessor segAccessor(allocNodeSegment, pCache);
00588         SegAllocLock segAllocLock(segAccessor);
00589         segAllocLock.lockExclusive(segPageId);
00590         SegmentAllocationNode &segAllocNode = segAllocLock.getNodeForWrite();
00591 
00592         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
00593         SegmentAllocationNode::ExtentEntry &extentEntry =
00594             segAllocNode.getExtentEntry(relativeExtentNum);
00595         if (commit) {
00596             extentEntry.nUnallocatedPages -= allocationCount;
00597         } else {
00598             extentEntry.nUnallocatedPages += allocationCount;
00599         }
00600 
00601         pModNode->updateCount -= allocationCount;
00602     }
00603 }
00604 
00605 void VersionedRandomAllocationSegment::allocateAllocNodes(
00606     uint iSegAlloc,
00607     PageId nextPageId,
00608     ExtentNum extentNum)
00609 {
00610     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00611     SegAllocLock segAllocLock(selfAccessor);
00612     PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
00613     segAllocLock.lockExclusive(segAllocPageId);
00614     if (segAllocLock.checkMagicNumber()) {
00615         // If the SegmentAllocationNode has already been allocated and this
00616         // is the first call to this method, check if we need to allocate
00617         // VersionedExtentAllocationNodes.  Otherwise, set the
00618         // nextSegAllocPageId.
00619 
00620         SegmentAllocationNode &node = segAllocLock.getNodeForWrite();
00621         if (nextPageId == NULL_PAGE_ID) {
00622             allocateExtAllocNodes(node, iSegAlloc, extentNum);
00623         } else {
00624             node.nextSegAllocPageId = nextPageId;
00625         }
00626     } else {
00627         // Allocate a new page and then recursively call this method to set
00628         // the nextSegAllocPageId on the predecessor SegmentAllocationNode
00629         // to the newly allocated page, allocating that SegmentAllocationNode
00630         // if it also hasn't been allocated.  If this is the first call to this
00631         // method, check if we need to allocate
00632         // VersionedExtentAllocationNodes.
00633 
00634         permAssert(iSegAlloc >= 1);
00635         segAllocLock.setMagicNumber();
00636         SegmentAllocationNode &newNode = segAllocLock.getNodeForWrite();
00637         // -1 for the extent allocation node itself
00638         newNode.nPagesPerExtent = nPagesPerExtent - 1;
00639         newNode.nExtents = 0;
00640         newNode.nextSegAllocPageId = nextPageId;
00641         allocateAllocNodes(iSegAlloc - 1, segAllocPageId, extentNum);
00642         if (nextPageId == NULL_PAGE_ID) {
00643             allocateExtAllocNodes(newNode, iSegAlloc, extentNum);
00644         }
00645     }
00646 }
00647 
00648 void VersionedRandomAllocationSegment::allocateExtAllocNodes(
00649     SegmentAllocationNode &segAllocNode,
00650     uint iSegAlloc,
00651     ExtentNum extentNum)
00652 {
00653     // Allocate new extents if the one we're going to be updating hasn't
00654     // been allocated yet.  Turn off page mapping so the updates will
00655     // be made on the permanent pages.
00656     ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
00657     if (segAllocNode.nExtents < relativeExtentNum + 1) {
00658         ExtentNum startExtentNum =
00659             segAllocNode.nExtents + nExtentsPerSegAlloc * iSegAlloc;
00660         segAllocNode.nExtents = relativeExtentNum + 1;
00661         formatPageExtentsTemplate<
00662                 VersionedExtentAllocationNode,
00663                 VersionedExtentAllocLock,
00664                 VersionedPageEntry>(
00665             segAllocNode,
00666             startExtentNum);
00667     }
00668 }
00669 
00670 bool VersionedRandomAllocationSegment::validateFreePageCount(PageId pageId)
00671 {
00672     ExtentNum extentNum;
00673     BlockNum iPageInExtent;
00674     uint iSegAlloc;
00675     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00676     assert(iPageInExtent);
00677 
00678     PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
00679     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00680     SegAllocLock segAllocLock(selfAccessor);
00681     segAllocLock.lockShared(segAllocPageId);
00682     SegmentAllocationNode const &segAllocNode = segAllocLock.getNodeForRead();
00683 
00684     ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
00685     SegmentAllocationNode::ExtentEntry const &extentEntry =
00686         segAllocNode.getExtentEntry(relativeExtentNum);
00687 
00688     VersionedExtentAllocLock extentAllocLock(selfAccessor);
00689     PageId extentPageId = getExtentAllocPageId(extentNum);
00690     extentAllocLock.lockExclusive(extentPageId);
00691     VersionedExtentAllocationNode const &extentNode =
00692         extentAllocLock.getNodeForRead();
00693     uint freePageCount = 0;
00694     for (uint i = 0; i < nPagesPerExtent; i++) {
00695         VersionedPageEntry const &pageEntry = extentNode.getPageEntry(i);
00696         if (pageEntry.ownerId == UNALLOCATED_PAGE_OWNER_ID) {
00697             freePageCount++;
00698         }
00699     }
00700 
00701     bool rc = (freePageCount == extentEntry.nUnallocatedPages);
00702     return rc;
00703 }
00704 
00705 void VersionedRandomAllocationSegment::freeTempPage(
00706     PageId origAllocNodePageId,
00707     PageId tempAllocNodePageId)
00708 {
00709     assert(mapMutex.isLocked(LOCKMODE_X));
00710     pTempSegment->deallocatePageRange(tempAllocNodePageId, tempAllocNodePageId);
00711     allocationNodeMap.erase(origAllocNodePageId);
00712 }
00713 
00714 bool VersionedRandomAllocationSegment::getOldPageIds(
00715     uint &iSegAlloc,
00716     ExtentNum &extentNum,
00717     TxnId oldestActiveTxnId,
00718     uint numPages,
00719     PageSet &oldPageSet)
00720 {
00721     uint numOldPages = 0;
00722     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00723     SegAllocLock segAllocLock(selfAccessor);
00724 
00725     while (numOldPages < numPages) {
00726         PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
00727         segAllocLock.lockShared(segAllocPageId);
00728         SegmentAllocationNode const &segAllocNode =
00729             segAllocLock.getNodeForRead();
00730         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
00731 
00732         for (uint i = relativeExtentNum; i < segAllocNode.nExtents;
00733             i++, extentNum++)
00734         {
00735             if (numOldPages >= numPages) {
00736                 // Wait until we've reached an extent boundary before
00737                 // returning
00738                 return true;
00739             }
00740 
00741             SegmentAllocationNode::ExtentEntry const &extentEntry =
00742                 segAllocNode.getExtentEntry(i);
00743             if (extentEntry.nUnallocatedPages == nPagesPerExtent - 1) {
00744                 continue;
00745             }
00746 
00747             VersionedExtentAllocLock extentAllocLock(selfAccessor);
00748             extentAllocLock.lockShared(getExtentAllocPageId(extentNum));
00749             VersionedExtentAllocationNode const &extentNode =
00750                 extentAllocLock.getNodeForRead();
00751 
00752             // Start at pageEntry 1 to skip past the extent header page,
00753             // which we can never deallocate
00754             for (uint j = 1; j < nPagesPerExtent; j++) {
00755                 VersionedPageEntry const &pageEntry =
00756                     extentNode.getPageEntry(j);
00757                 if (pageEntry.ownerId == UNALLOCATED_PAGE_OWNER_ID) {
00758                     continue;
00759                 }
00760 
00761                 // Map the pageEntry to its pageId
00762                 PageId pageId = getLinearPageId(makePageNum(extentNum, j));
00763 
00764                 // Skip over pages that have no snapshots, unless the page
00765                 // is deallocation-deferred
00766                 if (pageEntry.versionChainPageId == pageId &&
00767                     !isDeallocatedPageOwnerId(pageEntry.ownerId))
00768                 {
00769                     continue;
00770                 }
00771 
00772                 // Only consider deallocation-deferred pages if there are no
00773                 // longer any active txns that might be accessing the pages
00774                 if ((!isDeallocatedPageOwnerId(pageEntry.ownerId) &&
00775                         pageEntry.allocationCsn < oldestActiveTxnId) ||
00776                     (isDeallocatedPageOwnerId(pageEntry.ownerId) &&
00777                         getDeallocatedTxnId(pageEntry.ownerId) <
00778                             oldestActiveTxnId))
00779                 {
00780                     ++numOldPages;
00781                     oldPageSet.insert(oldPageSet.end(), pageId);
00782                 }
00783             }
00784         }
00785         ++iSegAlloc;
00786         if (segAllocNode.nextSegAllocPageId == NULL_PAGE_ID) {
00787             return false;
00788         }
00789     }
00790 
00791     return true;
00792 }
00793 
00794 void VersionedRandomAllocationSegment::deallocateOldPages(
00795     PageSet const &oldPageSet,
00796     TxnId oldestActiveTxnId)
00797 {
00798     SXMutexExclusiveGuard deallocationGuard(deallocationMutex);
00799     SXMutexExclusiveGuard mapGuard(mapMutex);
00800 
00801     std::hash_set<PageId> deallocatedPageSet;
00802     for (PageSetConstIter pageIter = oldPageSet.begin();
00803         pageIter != oldPageSet.end(); pageIter++)
00804     {
00805         PageId pageId = *pageIter;
00806 
00807         // Skip over pages that have already been deallocated while walking
00808         // through the page chain of a previous page
00809         if (deallocatedPageSet.find(pageId) != deallocatedPageSet.end()) {
00810             continue;
00811         }
00812         // Skip over pages that may have been deallocated by another
00813         // thread calling deallocateOldPages().
00814         if (!isPageIdAllocateCommitted(pageId)) {
00815             deallocatedPageSet.insert(pageId);
00816             continue;
00817         }
00818 
00819         // Determine the txnId corresponding to the oldest page in the page
00820         // chain that can be deallocated.  If no pages can be deallocated,
00821         // then skip the page.  If the page is marked as deallocation-deferred,
00822         // deallocate the entire page chain.
00823         PageId anchorPageId;
00824         bool deallocateChain;
00825         TxnId deallocationCsn =
00826             getOldestTxnId(
00827                 pageId,
00828                 oldestActiveTxnId,
00829                 anchorPageId,
00830                 deallocatedPageSet,
00831                 deallocateChain);
00832         if (deallocateChain) {
00833             deallocateEntirePageChain(
00834                 pageId,
00835                 oldestActiveTxnId,
00836                 deallocatedPageSet);
00837             continue;
00838         }
00839         if (deallocationCsn == NULL_TXN_ID) {
00840             continue;
00841         }
00842 
00843         // Deallocate all pages following the anchor that are older than
00844         // deallocationCsn
00845         deallocatePageChain(anchorPageId, deallocationCsn, deallocatedPageSet);
00846     }
00847 }
00848 
00849 TxnId VersionedRandomAllocationSegment::getOldestTxnId(
00850     PageId pageId,
00851     TxnId oldestActiveTxnId,
00852     PageId &anchorPageId,
00853     std::hash_set<PageId> &deallocatedPageSet,
00854     bool &deallocateChain)
00855 {
00856     // Loop through the page chain, looking for the anchor page, and the
00857     // second, newest old page.  The second, newest old page will be the newest
00858     // page that can be deallocated.  Note that we can't deallocate the newest
00859     // old page because it's still being referenced by active txns.
00860 
00861     PageId chainPageId = pageId;
00862     TxnId anchorCsn = NULL_TXN_ID;
00863     TxnId newestOldCsn = NULL_TXN_ID;
00864     TxnId nextNewestOldCsn = NULL_TXN_ID;
00865     anchorPageId = NULL_PAGE_ID;
00866     PageId newestOldPageId = NULL_PAGE_ID;
00867     PageId nextNewestOldPageId = NULL_PAGE_ID;
00868 
00869     deallocateChain = false;
00870     do {
00871         if (deallocatedPageSet.find(chainPageId) != deallocatedPageSet.end()) {
00872             return NULL_TXN_ID;
00873         }
00874         assert(isPageIdAllocateCommitted(chainPageId));
00875 
00876         ExtentNum extentNum;
00877         BlockNum iPageInExtent;
00878         uint iSegAlloc;
00879         splitPageId(chainPageId, iSegAlloc, extentNum, iPageInExtent);
00880         assert(iPageInExtent);
00881 
00882         SegmentAccessor selfAccessor(getTracingSegment(), pCache);
00883         PageId extentPageId = getExtentAllocPageId(extentNum);
00884         VersionedExtentAllocLock extentAllocLock(selfAccessor);
00885         extentAllocLock.lockShared(extentPageId);
00886         VersionedExtentAllocationNode const &extentNode =
00887             extentAllocLock.getNodeForRead();
00888         VersionedPageEntry const &pageEntry =
00889             extentNode.getPageEntry(iPageInExtent);
00890         assert(pageEntry.ownerId != UNCOMMITTED_PAGE_OWNER_ID);
00891 
00892         // If the page is marked as deallocation-deferred, need to deallocate
00893         // the entire page chain, if the oldest active txn is newer than the
00894         // txn that marked the page.  Otherwise, wait until the active txns
00895         // referencing those pages have committed.
00896         if (isDeallocatedPageOwnerId(pageEntry.ownerId)) {
00897             if (getDeallocatedTxnId(pageEntry.ownerId) < oldestActiveTxnId) {
00898                 deallocateChain = true;
00899             } else {
00900                 skipDeferredDeallocations(pageId, deallocatedPageSet);
00901             }
00902             return NULL_TXN_ID;
00903         }
00904 
00905         if (anchorCsn == NULL_TXN_ID ||
00906             pageEntry.allocationCsn < anchorCsn)
00907         {
00908             anchorCsn = pageEntry.allocationCsn;
00909             anchorPageId = chainPageId;
00910         }
00911         if (pageEntry.allocationCsn < oldestActiveTxnId) {
00912             if (newestOldCsn == NULL_TXN_ID ||
00913                 pageEntry.allocationCsn > newestOldCsn)
00914             {
00915                 if (newestOldCsn != NULL_TXN_ID) {
00916                     nextNewestOldCsn = newestOldCsn;
00917                     nextNewestOldPageId = newestOldPageId;
00918                 }
00919                 newestOldCsn = pageEntry.allocationCsn;
00920                 newestOldPageId = chainPageId;
00921             } else {
00922                 if (((nextNewestOldCsn == NULL_TXN_ID) ||
00923                         (pageEntry.allocationCsn > nextNewestOldCsn)) &&
00924                     (pageEntry.allocationCsn != newestOldCsn))
00925                 {
00926                     // It's possible to have two page entries with the same csn
00927                     // if a page is truncated and then versioned within the
00928                     // same transaction.
00929                     nextNewestOldCsn = pageEntry.allocationCsn;
00930                     nextNewestOldPageId = chainPageId;
00931                 }
00932             }
00933         }
00934         assert(pageEntry.versionChainPageId != NULL_PAGE_ID);
00935         chainPageId = pageEntry.versionChainPageId;
00936     } while (chainPageId != pageId);
00937 
00938     // At least one page in the chain has to be old
00939     assert(newestOldPageId != NULL_PAGE_ID);
00940     assert(anchorPageId != NULL_PAGE_ID);
00941     assert(nextNewestOldCsn == NULL_TXN_ID || nextNewestOldCsn < newestOldCsn);
00942 
00943     // If there is no next newest old page, then there's nothing to deallocate
00944     // in the page chain.  Add the pages we know are old to the
00945     // deallocatedPageSet so we can directly skip over them if we encounter
00946     // them again.
00947     if (nextNewestOldPageId == anchorPageId ||
00948         nextNewestOldPageId == NULL_PAGE_ID)
00949     {
00950         deallocatedPageSet.insert(anchorPageId);
00951         deallocatedPageSet.insert(pageId);
00952         deallocatedPageSet.insert(newestOldPageId);
00953         return NULL_TXN_ID;
00954     }
00955 
00956     // Set the deallocationCsn so only the next newest old page and any
00957     // pages older than it will be deallocated
00958     TxnId deallocationCsn = nextNewestOldCsn + 1;
00959     assert(deallocationCsn < oldestActiveTxnId);
00960 
00961     return deallocationCsn;
00962 }
00963 
00964 void VersionedRandomAllocationSegment::deallocateEntirePageChain(
00965     PageId pageId,
00966     TxnId oldestActiveTxnId,
00967     std::hash_set<PageId> &deallocatedPageSet)
00968 {
00969     PageId chainPageId = pageId;
00970     VersionedPageEntry pageEntry;
00971     do {
00972         getCommittedPageEntryCopy(chainPageId, pageEntry);
00973 
00974         // All pages in the chain should be marked as deallocation-deferred
00975         // since we mark them atomically. They also must be old.
00976         assert(isDeallocatedPageOwnerId(pageEntry.ownerId));
00977         assert(getDeallocatedTxnId(pageEntry.ownerId) < oldestActiveTxnId);
00978         assert(pageEntry.allocationCsn < oldestActiveTxnId);
00979 
00980         deallocateSinglePage(chainPageId, deallocatedPageSet);
00981         chainPageId = pageEntry.versionChainPageId;
00982     } while (chainPageId != pageId);
00983 }
00984 
00985 void VersionedRandomAllocationSegment::deallocateSinglePage(
00986     PageId pageId,
00987     std::hash_set<PageId> &deallocatedPageSet)
00988 {
00989     assert(mapMutex.isLocked(LOCKMODE_X));
00990 
00991     // We rely on superclass to discard page from cache as part of deallocation.
00992     RandomAllocationSegmentBase::deallocatePageRange(pageId, pageId);
00993 
00994     ExtentNum extentNum;
00995     BlockNum iPageInExtent;
00996     uint iSegAlloc;
00997     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
00998     assert(iPageInExtent);
00999 
01000     // Reflect the changes in the temporary page entry, if it exists
01001     PageId extentPageId = getExtentAllocPageId(extentNum);
01002     NodeMapConstIter iter = allocationNodeMap.find(extentPageId);
01003     if (iter != allocationNodeMap.end()) {
01004         copyPageEntryToTemp(
01005             extentPageId,
01006             iter->second->tempPageId,
01007             iPageInExtent);
01008     }
01009 
01010     // Reflect the changes in the temporary extent entry, if it exists
01011     PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
01012     iter = allocationNodeMap.find(segAllocPageId);
01013     if (iter != allocationNodeMap.end()) {
01014         PageId tempSegAllocNodePageId = iter->second->tempPageId;
01015         SegmentAccessor segAccessor(pTempSegment, pCache);
01016         SegAllocLock tempSegAllocLock(segAccessor);
01017         tempSegAllocLock.lockExclusive(tempSegAllocNodePageId);
01018         SegmentAllocationNode &tempSegAllocNode =
01019             tempSegAllocLock.getNodeForWrite();
01020         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
01021         SegmentAllocationNode::ExtentEntry &tempExtentEntry =
01022             tempSegAllocNode.getExtentEntry(relativeExtentNum);
01023         tempExtentEntry.nUnallocatedPages++;
01024     }
01025 
01026     deallocatedPageSet.insert(pageId);
01027 }
01028 
01029 void VersionedRandomAllocationSegment::deallocatePageChain(
01030     PageId anchorPageId,
01031     TxnId deallocationCsn,
01032     std::hash_set<PageId> &deallocatedPageSet)
01033 {
01034     VersionedPageEntry prevPageEntry;
01035     getCommittedPageEntryCopy(anchorPageId, prevPageEntry);
01036     assert(
01037         prevPageEntry.ownerId != UNALLOCATED_PAGE_OWNER_ID &&
01038         !isDeallocatedPageOwnerId(prevPageEntry.ownerId));
01039 
01040     // See if the page is in the process of being marked
01041     // deallocation-deferred.  If it is, then don't deallocate any of the
01042     // pages in the page chain, even if they are old.  We'll wait until
01043     // the deallocation-deferral is actually committed before deallocating
01044     // them.
01045     if (uncommittedDeallocation(anchorPageId, deallocatedPageSet)) {
01046         return;
01047     }
01048 
01049     bool needsUpdate = false;
01050     PageId prevPageId = anchorPageId;
01051     PageId nextPageId = prevPageEntry.versionChainPageId;
01052     do {
01053         VersionedPageEntry pageEntry;
01054         getCommittedPageEntryCopy(nextPageId, pageEntry);
01055 
01056         if (pageEntry.allocationCsn < deallocationCsn) {
01057             // Deallocate the page entry and chain the previous page
01058             // entry to the page chained from the deallocated entry.
01059             // All of this is being done in the permanent page entry.
01060             // The temporary entry will be updated below.
01061             deallocateSinglePage(nextPageId, deallocatedPageSet);
01062             nextPageId = pageEntry.versionChainPageId;
01063             chainPageEntries(
01064                 prevPageId,
01065                 nextPageId,
01066                 NULL_PAGE_ID,
01067                 true);
01068             prevPageEntry.versionChainPageId = nextPageId;
01069             needsUpdate = true;
01070 
01071         } else {
01072             // Reflect the changes made in the previous page entry
01073             // in the temporary page entry, if it exists
01074             if (needsUpdate) {
01075                 updateTempPageEntry(prevPageId, prevPageEntry);
01076             }
01077             needsUpdate = false;
01078 
01079             // Move the info for the current page entry into the previous
01080             prevPageId = nextPageId;
01081             prevPageEntry = pageEntry;
01082             nextPageId = pageEntry.versionChainPageId;
01083         }
01084     } while (nextPageId != anchorPageId);
01085 
01086     // Update the last previous entry if needed
01087     if (needsUpdate) {
01088         updateTempPageEntry(prevPageId, prevPageEntry);
01089     }
01090 }
01091 
01092 bool VersionedRandomAllocationSegment::uncommittedDeallocation(
01093     PageId anchorPageId,
01094     std::hash_set<PageId> &deallocatedPageSet)
01095 {
01096     ExtentNum extentNum;
01097     BlockNum iPageInExtent;
01098     uint iSegAlloc;
01099     splitPageId(anchorPageId, iSegAlloc, extentNum, iPageInExtent);
01100     assert(iPageInExtent);
01101 
01102     // See if the page entry corresponding to the anchor is marked as
01103     // deallocation-deferred with a txnId of 0 in the temporary page entry.
01104     // If it is, then that means the txn doing the deallocation has not
01105     // committed yet.
01106 
01107     assert(mapMutex.isLocked(LOCKMODE_X));
01108     NodeMapConstIter iter =
01109         allocationNodeMap.find(getExtentAllocPageId(extentNum));
01110     if (iter == allocationNodeMap.end()) {
01111         return false;
01112     }
01113 
01114     PageId tempExtentPageId = iter->second->tempPageId;
01115     SegmentAccessor segAccessor(pTempSegment, pCache);
01116     VersionedExtentAllocLock tempExtAllocLock(segAccessor);
01117         tempExtAllocLock.lockShared(tempExtentPageId);
01118     VersionedExtentAllocationNode const &tempExtentNode =
01119         tempExtAllocLock.getNodeForRead();
01120     VersionedPageEntry const &tempPageEntry =
01121         tempExtentNode.getPageEntry(iPageInExtent);
01122     if (!isDeallocatedPageOwnerId(tempPageEntry.ownerId)) {
01123         return false;
01124     }
01125     if (getDeallocatedTxnId(tempPageEntry.ownerId) != TxnId(0)) {
01126         return false;
01127     }
01128 
01129     skipDeferredDeallocations(anchorPageId, deallocatedPageSet);
01130     return true;
01131 }
01132 
01133 void VersionedRandomAllocationSegment::skipDeferredDeallocations(
01134     PageId pageId,
01135     std::hash_set<PageId> &deallocatedPageSet)
01136 {
01137     // Add all the pages in the chain to the deallocated page set so we'll
01138     // skip over them.  All the other pages in the chain should also be
01139     // marked as deallocation-deferred.
01140     PageId chainPageId = pageId;
01141     VersionedPageEntry pageEntry;
01142     do {
01143         deallocatedPageSet.insert(chainPageId);
01144         getCommittedPageEntryCopy(chainPageId, pageEntry);
01145         assert(isDeallocatedPageOwnerId(pageEntry.ownerId));
01146         chainPageId = pageEntry.versionChainPageId;
01147     } while (chainPageId != pageId);
01148 }
01149 
01150 bool VersionedRandomAllocationSegment::validatePageChain(PageId anchorPageId)
01151 {
01152     // TODO zfong 25-Oct-2007: Check that the page chain is not circular,
01153     // except for the expected reference back to the anchor page.
01154 
01155     PageId chainPageId = anchorPageId;
01156     VersionedPageEntry pageEntry;
01157     do {
01158         getCommittedPageEntryCopy(chainPageId, pageEntry);
01159         chainPageId = pageEntry.versionChainPageId;
01160     } while (chainPageId != anchorPageId);
01161 
01162     return true;
01163 }
01164 
01165 void VersionedRandomAllocationSegment::updateTempPageEntry(
01166     PageId pageId,
01167     VersionedPageEntry const &pageEntry)
01168 {
01169     ExtentNum extentNum;
01170     BlockNum iPageInExtent;
01171     uint iSegAlloc;
01172     splitPageId(pageId, iSegAlloc, extentNum, iPageInExtent);
01173     assert(iPageInExtent);
01174 
01175     assert(mapMutex.isLocked(LOCKMODE_X));
01176     NodeMapConstIter iter =
01177         allocationNodeMap.find(getExtentAllocPageId(extentNum));
01178     if (iter != allocationNodeMap.end()) {
01179         PageId tempExtentPageId = iter->second->tempPageId;
01180         SegmentAccessor segAccessor(pTempSegment, pCache);
01181         VersionedExtentAllocLock tempExtAllocLock(segAccessor);
01182             tempExtAllocLock.lockExclusive(tempExtentPageId);
01183         VersionedExtentAllocationNode &tempExtentNode =
01184             tempExtAllocLock.getNodeForWrite();
01185         VersionedPageEntry &tempPageEntry =
01186             tempExtentNode.getPageEntry(iPageInExtent);
01187         tempPageEntry = pageEntry;
01188     }
01189 }
01190 
01191 void VersionedRandomAllocationSegment::freeTempPages()
01192 {
01193     SXMutexExclusiveGuard mapGuard(mapMutex);
01194 
01195     ModifiedAllocationNodeMap::iterator iter = allocationNodeMap.begin();
01196     while (iter != allocationNodeMap.end()) {
01197         // All entries in the map should correspond to segment allocation
01198         // nodes because we free the nodes corresponding to extent allocation
01199         // nodes once their update counts reach 0.
01200         SharedModifiedAllocationNode pModAllocNode = iter->second;
01201         assert(!pModAllocNode->updateCount && pModAllocNode->isSegAllocNode);
01202         PageId pageId = iter->first;
01203         iter++;
01204         freeTempPage(pageId, pModAllocNode->tempPageId);
01205     }
01206 }
01207 
01208 SXMutex &VersionedRandomAllocationSegment::getDeallocationMutex()
01209 {
01210     return deallocationMutex;
01211 }
01212 
01213 BlockNum VersionedRandomAllocationSegment::backupAllocationNodes(
01214     SharedSegPageBackupRestoreDevice pBackupDevice,
01215     bool countDataPages,
01216     TxnId lowerBoundCsn,
01217     TxnId upperBoundCsn,
01218     bool volatile const &abortFlag)
01219 {
01220     assert(upperBoundCsn != NULL_TXN_ID);
01221     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
01222     SegAllocLock segAllocLock(selfAccessor);
01223     uint iSegAlloc = 0;
01224     ExtentNum extentNum = 0;
01225     BlockNum nDataPages = 0;
01226 
01227     while (true) {
01228         PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
01229         segAllocLock.lockShared(segAllocPageId);
01230         pBackupDevice->writeBackupPage(
01231             segAllocLock.getPage().getReadableData());
01232 
01233         SegmentAllocationNode const &segAllocNode =
01234             segAllocLock.getNodeForRead();
01235         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
01236 
01237         for (uint i = relativeExtentNum; i < segAllocNode.nExtents;
01238             i++, extentNum++)
01239         {
01240             checkAbort(abortFlag);
01241             SegmentAllocationNode::ExtentEntry const &extentEntry =
01242                 segAllocNode.getExtentEntry(i);
01243 
01244             VersionedExtentAllocLock extentAllocLock(selfAccessor);
01245             extentAllocLock.lockShared(getExtentAllocPageId(extentNum));
01246             pBackupDevice->writeBackupPage(
01247                 extentAllocLock.getPage().getReadableData());
01248 
01249             if (countDataPages) {
01250                 // Don't bother looping through the entries if we know none
01251                 // are allocated
01252                 if (extentEntry.nUnallocatedPages == nPagesPerExtent - 1) {
01253                     continue;
01254                 }
01255 
01256                 VersionedExtentAllocationNode const &extentNode =
01257                     extentAllocLock.getNodeForRead();
01258 
01259                 // Start at pageEntry 1 to skip past the extent header page
01260                 for (uint j = 1; j < nPagesPerExtent; j++) {
01261                     checkAbort(abortFlag);
01262                     VersionedPageEntry const &pageEntry =
01263                         extentNode.getPageEntry(j);
01264                     if (pageEntry.ownerId != UNALLOCATED_PAGE_OWNER_ID &&
01265                        (lowerBoundCsn == NULL_TXN_ID ||
01266                            pageEntry.allocationCsn > lowerBoundCsn) &&
01267                        (pageEntry.allocationCsn <= upperBoundCsn))
01268                     {
01269                         nDataPages++;
01270                     }
01271                 }
01272             }
01273         }
01274 
01275         ++iSegAlloc;
01276         if (segAllocNode.nextSegAllocPageId == NULL_PAGE_ID) {
01277             break;
01278         }
01279     }
01280 
01281     return nDataPages;
01282 }
01283 
01284 void VersionedRandomAllocationSegment::backupDataPages(
01285     SharedSegPageBackupRestoreDevice pBackupDevice,
01286     TxnId lowerBoundCsn,
01287     TxnId upperBoundCsn,
01288     bool volatile const &abortFlag)
01289 {
01290     locateDataPages(
01291         pBackupDevice,
01292         lowerBoundCsn,
01293         upperBoundCsn,
01294         true,
01295         abortFlag);
01296 }
01297 
01298 void VersionedRandomAllocationSegment::locateDataPages(
01299     SharedSegPageBackupRestoreDevice pBackupDevice,
01300     TxnId lowerBoundCsn,
01301     TxnId upperBoundCsn,
01302     bool isBackup,
01303     bool volatile const &abortFlag)
01304 {
01305     assert(upperBoundCsn != NULL_TXN_ID);
01306     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
01307     SegAllocLock segAllocLock(selfAccessor);
01308     uint iSegAlloc = 0;
01309     ExtentNum extentNum = 0;
01310     PBuffer segNodeBuffer = NULL;
01311     PBuffer extentNodeBuffer = NULL;
01312     if (isBackup) {
01313         segNodeBuffer = pBackupDevice->getReservedBufferPage();
01314         extentNodeBuffer = pBackupDevice->getReservedBufferPage();
01315     }
01316 
01317     while (true) {
01318         PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
01319         segAllocLock.lockShared(segAllocPageId);
01320         // In the case of a backup, make a copy of the allocation nodes so
01321         // we don't pin them while we're doing I/O on the data pages mapped
01322         // by the extent entries in those nodes.  Keeping the nodes pinned
01323         // prevents new pages from being allocated from those nodes.
01324         if (isBackup) {
01325             memcpy(
01326                 segNodeBuffer,
01327                 segAllocLock.getPage().getReadableData(),
01328                 getFullPageSize());
01329             segAllocLock.unlock();
01330         }
01331         SegmentAllocationNode const &segAllocNode =
01332             (isBackup) ?
01333                 *reinterpret_cast<SegmentAllocationNode const *>
01334                     (segNodeBuffer) :
01335                 segAllocLock.getNodeForRead();
01336         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
01337 
01338         for (uint i = relativeExtentNum; i < segAllocNode.nExtents;
01339             i++, extentNum++)
01340         {
01341             checkAbort(abortFlag);
01342 
01343             SegmentAllocationNode::ExtentEntry const &extentEntry =
01344                 segAllocNode.getExtentEntry(i);
01345             if (extentEntry.nUnallocatedPages == nPagesPerExtent - 1) {
01346                 continue;
01347             }
01348 
01349             VersionedExtentAllocLock extentAllocLock(selfAccessor);
01350             extentAllocLock.lockShared(getExtentAllocPageId(extentNum));
01351             if (isBackup) {
01352                 memcpy(
01353                     extentNodeBuffer,
01354                     extentAllocLock.getPage().getReadableData(),
01355                     getFullPageSize());
01356                 extentAllocLock.unlock();
01357             }
01358             VersionedExtentAllocationNode const &extentNode =
01359                 (isBackup) ?
01360                     *reinterpret_cast<VersionedExtentAllocationNode const *>
01361                         (extentNodeBuffer) :
01362                     extentAllocLock.getNodeForRead();
01363 
01364             // Start at pageEntry 1 to skip past the extent header page
01365             for (uint j = 1; j < nPagesPerExtent; j++) {
01366                 checkAbort(abortFlag);
01367                 VersionedPageEntry const &pageEntry =
01368                     extentNode.getPageEntry(j);
01369                 // Ignore pages outside the csn boundaries
01370                 if (pageEntry.ownerId == UNALLOCATED_PAGE_OWNER_ID ||
01371                    (lowerBoundCsn != NULL_TXN_ID &&
01372                        pageEntry.allocationCsn <= lowerBoundCsn) ||
01373                    (pageEntry.allocationCsn > upperBoundCsn))
01374                 {
01375                     continue;
01376                 }
01377 
01378                 // Map the pageEntry to its pageId, and then either back up
01379                 // or restore it.
01380                 PageId pageId = getLinearPageId(makePageNum(extentNum, j));
01381                 BlockId blockId = translatePageId(pageId);
01382                 if (isBackup) {
01383                     pBackupDevice->backupPage(blockId);
01384                 } else {
01385                     pBackupDevice->restorePage(blockId);
01386                 }
01387             }
01388         }
01389         ++iSegAlloc;
01390         if (segAllocNode.nextSegAllocPageId == NULL_PAGE_ID) {
01391             break;
01392         }
01393     }
01394 
01395     // Wait for all pending writes to complete
01396     pBackupDevice->waitForPendingWrites();
01397 }
01398 
01399 void VersionedRandomAllocationSegment::restoreFromBackup(
01400     SharedSegPageBackupRestoreDevice pBackupDevice,
01401     TxnId lowerBoundCsn,
01402     TxnId upperBoundCsn,
01403     bool volatile const &abortFlag)
01404 {
01405     // First restore the allocation node pages.
01406     //
01407     // The assumption is that prior to calling this method, all pages in the
01408     // cache have been unmapped, so we're ensured that when we're reading
01409     // pages from cache, we won't read stale copies.
01410 
01411     SegmentAccessor selfAccessor(getTracingSegment(), pCache);
01412     SegAllocLock segAllocLock(selfAccessor);
01413     uint iSegAlloc = 0;
01414     ExtentNum extentNum = 0;
01415 
01416     while (true) {
01417         // Restore the allocation node page from the backup file, writing it
01418         // to disk.  Then wait for the write to complete before reading it
01419         // into cache, so we're ensured that we pick up the completed write.
01420         // Also make sure there's enough space for the first extent in this
01421         // SegAllocNode.
01422         PageId segAllocPageId = getSegAllocPageId(iSegAlloc);
01423 
01424         if (!DelegatingSegment::ensureAllocatedSize(
01425             makePageNum(extentNum, nPagesPerExtent)))
01426         {
01427             throw FennelExcn(
01428                 FennelResource::instance().outOfSpaceDuringRestore());
01429         }
01430         pBackupDevice->restorePage(translatePageId(segAllocPageId));
01431         pBackupDevice->waitForPendingWrites();
01432         segAllocLock.lockShared(segAllocPageId);
01433 
01434         SegmentAllocationNode const &segAllocNode =
01435             segAllocLock.getNodeForRead();
01436         ExtentNum relativeExtentNum = extentNum % nExtentsPerSegAlloc;
01437 
01438         for (uint i = relativeExtentNum; i < segAllocNode.nExtents;
01439             i++, extentNum++)
01440         {
01441             checkAbort(abortFlag);
01442             SegmentAllocationNode::ExtentEntry const &extentEntry =
01443                 segAllocNode.getExtentEntry(i);
01444             // Make sure there's enough space in the segment for this extent
01445             if (!DelegatingSegment::ensureAllocatedSize(
01446                 makePageNum(extentNum, nPagesPerExtent)))
01447             {
01448                 throw FennelExcn(
01449                     FennelResource::instance().outOfSpaceDuringRestore());
01450             }
01451             pBackupDevice->restorePage(
01452                 translatePageId(getExtentAllocPageId(extentNum)));
01453         }
01454         ++iSegAlloc;
01455         if (segAllocNode.nextSegAllocPageId == NULL_PAGE_ID) {
01456             break;
01457         }
01458     }
01459 
01460     // Walk through the allocation node pages just restored, looking for
01461     // the page entries within the lower and upper bounds, and restore them.
01462     // But first make sure to wait for the writes of the remaining extent
01463     // allocation node pages to complete.
01464     pBackupDevice->waitForPendingWrites();
01465     locateDataPages(
01466         pBackupDevice,
01467         lowerBoundCsn,
01468         upperBoundCsn,
01469         false,
01470         abortFlag);
01471 }
01472 
01473 void VersionedRandomAllocationSegment::checkAbort(
01474     bool volatile const &abortFlag)
01475 {
01476     if (abortFlag) {
01477         throw AbortExcn();
01478     }
01479 }
01480 
01481 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/segment/VersionedRandomAllocationSegment.cpp#19 $");
01482 
01483 // End VersionedRandomAllocationSegment.cpp

Generated on Mon Jun 22 04:00:20 2009 for Fennel by  doxygen 1.5.1