Database.cpp

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/db/Database.cpp#43 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 The Eigenbase Project
00005 // Copyright (C) 2003-2009 SQLstream, Inc.
00006 // Copyright (C) 2005-2009 LucidEra, Inc.
00007 // Portions Copyright (C) 1999-2009 John V. Sichi
00008 //
00009 // This program is free software; you can redistribute it and/or modify it
00010 // under the terms of the GNU General Public License as published by the Free
00011 // Software Foundation; either version 2 of the License, or (at your option)
00012 // any later version approved by The Eigenbase Project.
00013 //
00014 // This program is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 */
00023 
00024 #include "fennel/common/CommonPreamble.h"
00025 #include "fennel/common/AbortExcn.h"
00026 #include "fennel/db/Database.h"
00027 #include "fennel/db/CheckpointThread.h"
00028 #include "fennel/db/DataFormatExcn.h"
00029 #include "fennel/common/ConfigMap.h"
00030 #include "fennel/common/FileSystem.h"
00031 #include "fennel/common/FennelResource.h"
00032 #include "fennel/device/RandomAccessFileDevice.h"
00033 #include "fennel/cache/Cache.h"
00034 #include "fennel/cache/PagePredicate.h"
00035 #include "fennel/segment/SegmentFactory.h"
00036 #include "fennel/segment/LinearDeviceSegment.h"
00037 #include "fennel/segment/Segment.h"
00038 #include "fennel/segment/VersionedSegment.h"
00039 #include "fennel/segment/VersionedRandomAllocationSegment.h"
00040 #include "fennel/common/CompoundId.h"
00041 #include "fennel/txn/LogicalTxnLog.h"
00042 #include "fennel/txn/LogicalRecoveryLog.h"
00043 #include "fennel/common/StatsTarget.h"
00044 #include "fennel/common/FennelResource.h"
00045 
00046 #include <boost/filesystem/operations.hpp>
00047 
00048 #ifdef __MSVC__
00049 #include <process.h>
00050 #endif
00051 
00052 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/db/Database.cpp#43 $");
00053 
00054 using namespace boost::filesystem;
00055 
00056 ParamName Database::paramDatabaseDir = "databaseDir";
00057 ParamName Database::paramResourceDir = "resourceDir";
00058 ParamName Database::paramForceTxns = "forceTxns";
00059 ParamName Database::paramDisableSnapshots = "disableSnapshots";
00060 ParamName Database::paramDatabasePrefix = "database";
00061 ParamName Database::paramTempPrefix = "temp";
00062 ParamName Database::paramShadowLogPrefix = "databaseShadowLog";
00063 ParamName Database::paramTxnLogPrefix = "databaseTxnLog";
00064 ParamName Database::paramInitSizeSuffix = "InitSize";
00065 ParamName Database::paramMaxSizeSuffix = "MaxSize";
00066 ParamName Database::paramIncSizeSuffix = "IncrementSize";
00067 
00068 ParamVal Database::valLogAllocLinear = "linear";
00069 ParamVal Database::valLogAllocCircular = "circular";
00070 
00071 const SegmentId Database::DEFAULT_DATA_SEGMENT_ID = SegmentId(1);
00072 const SegmentId Database::TEMP_SEGMENT_ID = SegmentId(2);
00073 
00074 // NOTE:  correct sequence is critical in most Database operations
00075 
00076 SharedDatabase Database::newDatabase(
00077     SharedCache pCacheInit,
00078     ConfigMap const &configMapInit,
00079     DeviceMode openModeInit,
00080     SharedTraceTarget pTraceTarget,
00081     SharedPseudoUuidGenerator pUuidGenerator)
00082 {
00083     if (!pUuidGenerator) {
00084         pUuidGenerator.reset(new PseudoUuidGenerator());
00085     }
00086     SharedDatabase pDb =
00087         SharedDatabase(
00088             new Database(
00089                 pCacheInit, configMapInit, openModeInit, pTraceTarget,
00090                 pUuidGenerator),
00091             ClosableObjectDestructor());
00092     pDb->init();
00093     return pDb;
00094 }
00095 
00096 Database::Database(
00097     SharedCache pCacheInit,
00098     ConfigMap const &configMapInit,
00099     DeviceMode openModeInit,
00100     SharedTraceTarget pTraceTarget,
00101     SharedPseudoUuidGenerator pUuidGeneratorInit)
00102     : TraceSource(pTraceTarget,"database"),
00103       pCache(pCacheInit),
00104       configMap(configMapInit),
00105       pUuidGenerator(pUuidGeneratorInit)
00106 {
00107     openMode = openModeInit;
00108     disableDeallocateOld = false;
00109 }
00110 
00111 void Database::init()
00112 {
00113     forceTxns = configMap.getBoolParam(paramForceTxns);
00114     disableSnapshots = configMap.getBoolParam(paramDisableSnapshots);
00115 
00116     // NOTE:  do this early in case other initialization throws exceptions
00117     // (and to prevent thread-safety issues later on)
00118     std::string resourceDir = configMap.getStringParam(paramResourceDir);
00119     if (resourceDir == "") {
00120         // If we weren't told explicitly where to find resources, make sure
00121         // we can rely on an environment variable setting.  TODO:
00122         // instead of pre-checking this, get information back from
00123         // FennelResource to tell us whether it was successfully initialized.
00124         assert(getenv("FENNEL_HOME"));
00125     } else {
00126         FennelResource::setResourceFileLocation(resourceDir);
00127     }
00128     FennelResource::instance();
00129 
00130     dataDeviceId = DeviceId(1);
00131     shadowDeviceId = DeviceId(2);
00132     txnLogDeviceId = DeviceId(3);
00133     tempDeviceId = DeviceId(4);
00134 
00135     headerPageId1 = Segment::getLinearPageId(0);
00136     headerPageId2 = Segment::getLinearPageId(1);
00137 
00138     header.versionNumber = SegVersionNum(0);
00139     recoveryRequired = false;
00140 
00141     // REVIEW:  Have to do this so that later assignments from header to stored
00142     // data work correctly.  But it breaks encapsulation.  Find a better way.
00143     header.magicNumber = DatabaseHeader::MAGIC_NUMBER;
00144 
00145     // TODO:  use boost filesystem library for platform-independent path
00146     // manipulation
00147     std::string databaseDir = configMap.getStringParam(paramDatabaseDir);
00148     // TODO:  real excn
00149     assert(databaseDir != "");
00150 
00151     dataDeviceName = databaseDir + "/db.dat";
00152     shadowDeviceName = databaseDir + "/shadowlog.dat";
00153     txnLogDeviceName = databaseDir + "/txnlog.dat";
00154     tempDeviceName = databaseDir + "/temp.dat";
00155 
00156     nCheckpoints = nCheckpointsStat = 0;
00157 
00158     pSegmentFactory = SegmentFactory::newSegmentFactory(
00159         configMap,getSharedTraceTarget());
00160 
00161     if (!openMode.create) {
00162         // TODO:  real excn
00163         assert(FileSystem::doesFileExist(dataDeviceName.c_str()));
00164         if (FileSystem::doesFileExist(shadowDeviceName.c_str())) {
00165             prepareForRecovery();
00166             return;
00167         }
00168     }
00169 
00170     openSegments();
00171 }
00172 
00173 void Database::prepareForRecovery()
00174 {
00175     FENNEL_TRACE(TRACE_WARNING, "recovery required");
00176     recoveryRequired = true;
00177     createTempSegment();
00178     LinearDeviceSegmentParams dataDeviceParams;
00179     createDataDevice(dataDeviceParams);
00180     loadHeader(true);
00181     writeHeader();
00182     SharedSegment pShadowLogSegment = createShadowLog(openMode);
00183     createDataSegment(pShadowLogSegment,dataDeviceParams);
00184 }
00185 
00186 void Database::openSegments()
00187 {
00188 #ifdef NDEBUG
00189     FENNEL_TRACE(TRACE_INFO, "Fennel build:  --with-optimization");
00190 #else
00191     FENNEL_TRACE(TRACE_INFO, "Fennel build:  --without-optimization");
00192 #endif
00193     FENNEL_TRACE(TRACE_INFO, "opening database; process ID = " << getpid());
00194 
00195     pCheckpointThread = SharedCheckpointThread(
00196         new CheckpointThread(*this),
00197         ClosableObjectDestructor());
00198 
00199     createTempSegment();
00200 
00201     pUuidGenerator->generateUuid(header.onlineUuid);
00202     FENNEL_TRACE(TRACE_INFO, "online UUID = " << header.onlineUuid);
00203 
00204     DeviceMode txnLogMode = openMode;
00205     txnLogMode.create = true;
00206     txnLogMode.direct = true;
00207     createTxnLog(txnLogMode);
00208 
00209     LinearDeviceSegmentParams dataDeviceParams;
00210     createDataDevice(dataDeviceParams);
00211 
00212     if (openMode.create) {
00213         // online UUID will be written out by allocateHeader
00214         allocateHeader();
00215     } else {
00216         PseudoUuid newOnlineUuid = header.onlineUuid;
00217         loadHeader(false);
00218         // overwrite old online UUID; new one will be written out
00219         // by checkpoint below
00220         header.onlineUuid = newOnlineUuid;
00221     }
00222 
00223     DeviceMode shadowMode = openMode;
00224     shadowMode.create = true;
00225     shadowMode.direct = true;
00226 
00227     SharedSegment pShadowLogSegment = createShadowLog(shadowMode);
00228     createDataSegment(pShadowLogSegment, dataDeviceParams);
00229 
00230     FENNEL_TRACE(
00231         TRACE_INFO,
00232         "database opened; page version = "
00233         << header.versionNumber);
00234 
00235     if (!openMode.create) {
00236         checkpointImpl();
00237     }
00238 
00239     pCheckpointThread->start();
00240 }
00241 
00242 Database::~Database()
00243 {
00244 }
00245 
00246 void Database::closeImpl()
00247 {
00248     FENNEL_TRACE(
00249         TRACE_INFO,
00250         "closing database");
00251     if (pCheckpointThread) {
00252         pCheckpointThread->close();
00253     }
00254 
00255     // Free any leftover temp pages used for page versioning
00256     if (pDataSegment && areSnapshotsEnabled()) {
00257         VersionedRandomAllocationSegment *pVersionedRandomSegment =
00258             SegmentFactory::dynamicCast<VersionedRandomAllocationSegment *>(
00259                 pDataSegment);
00260         pVersionedRandomSegment->freeTempPages();
00261     }
00262 
00263     // Verify that no garbage temp pages remain allocated.
00264     if (pTempSegment) {
00265         assert(pTempSegment->getAllocatedSizeInPages() == 0);
00266     }
00267 
00268     if (isRecoveryRequired()) {
00269         closeDevices();
00270     } else {
00271         // NOTE jvs 14-Nov-2006:  In case we're auto-closing after
00272         // a failed startup, skip checkpoint if we don't have
00273         // everything we need for it.
00274         if (pTxnLog && pDataSegment) {
00275             checkpointImpl();
00276         }
00277         closeDevices();
00278         deleteLogs();
00279     }
00280     FENNEL_TRACE(
00281         TRACE_INFO,
00282         "database closed; page version = "
00283         << header.versionNumber);
00284 }
00285 
00286 void Database::closeDevices()
00287 {
00288     pVersionedSegment = NULL;
00289     pTxnLog.reset();
00290     // REVIEW: have to explicitly close in case someone (like a recovery
00291     // factory) else still has a segment reference; should probably find a
00292     // better way to deal with this
00293     if (pDataSegment) {
00294         pDataSegment->close();
00295         pDataSegment.reset();
00296     }
00297     pHeaderSegment.reset();
00298     if (pTempSegment) {
00299         pTempSegment->close();
00300         pTempSegment.reset();
00301     }
00302 
00303     // for incomplete recovery or startup, these devices may not have been
00304     // opened yet
00305     if (pCache) {
00306         if (pCache->getDevice(txnLogDeviceId)) {
00307             pCache->unregisterDevice(txnLogDeviceId);
00308         }
00309         if (pCache->getDevice(shadowDeviceId)) {
00310             pCache->unregisterDevice(shadowDeviceId);
00311         }
00312         if (pCache->getDevice(dataDeviceId)) {
00313             pCache->unregisterDevice(dataDeviceId);
00314         }
00315         if (pCache->getDevice(tempDeviceId)) {
00316             pCache->unregisterDevice(tempDeviceId);
00317         }
00318     }
00319 }
00320 
00321 void Database::deleteLogs()
00322 {
00323     FileSystem::remove(shadowDeviceName.c_str());
00324     FileSystem::remove(txnLogDeviceName.c_str());
00325 
00326     // TODO jvs 25-June-2005:  here and in LogicalRecoveryLog, we should
00327     // be using ConfigMap to determine where to store txn logs
00328     // instead of current_path()
00329     directory_iterator end_itr;
00330     for (directory_iterator itr(current_path()); itr != end_itr; ++itr) {
00331         std::string filename = itr->path().filename();
00332         // TODO jvs 25-June-2005:  encapsulate filename parsing in
00333         // LogicalRecoveryLog
00334         if (filename.length() < 4) {
00335             continue;
00336         }
00337         if (filename.substr(0, 3) != "txn") {
00338             continue;
00339         }
00340         if (filename.substr(filename.length() - 4, 4) != ".dat") {
00341             continue;
00342         }
00343         FileSystem::remove(filename.c_str());
00344     }
00345 }
00346 
00347 SharedSegment Database::createTxnLogSegment(
00348     DeviceMode txnLogMode,PageId oldestPageId)
00349 {
00350     SharedRandomAccessDevice pTxnLogDevice(
00351         new RandomAccessFileDevice(txnLogDeviceName,txnLogMode));
00352     pCache->registerDevice(txnLogDeviceId,pTxnLogDevice);
00353 
00354     LinearDeviceSegmentParams deviceParams;
00355     readDeviceParams(paramTxnLogPrefix,txnLogMode,deviceParams);
00356     CompoundId::setDeviceId(deviceParams.firstBlockId,txnLogDeviceId);
00357     CompoundId::setBlockNum(deviceParams.firstBlockId,0);
00358     if (forceTxns) {
00359         deviceParams.nPagesAllocated = 0;
00360     } else {
00361         deviceParams.nPagesAllocated = MAXU;
00362         deviceParams.nPagesIncrement = 0;
00363         deviceParams.nPagesMax = deviceParams.nPagesMin;
00364     }
00365 
00366     SharedSegment pLinearSegment =
00367         pSegmentFactory->newLinearDeviceSegment(
00368             pCache,
00369             deviceParams);
00370 
00371     SharedSegment pTxnLogSegment;
00372     if (forceTxns) {
00373         pTxnLogSegment = pLinearSegment;
00374     } else {
00375         pTxnLogSegment = pSegmentFactory->newCircularSegment(
00376             pLinearSegment,pCheckpointThread,oldestPageId);
00377     }
00378 
00379     return pTxnLogSegment;
00380 }
00381 
00382 void Database::createTxnLog(DeviceMode txnLogMode)
00383 {
00384     SharedSegment pTxnLogSegment = createTxnLogSegment(
00385         txnLogMode,NULL_PAGE_ID);
00386     SegmentAccessor segmentAccessor(pTxnLogSegment,pCache);
00387     pTxnLog = LogicalTxnLog::newLogicalTxnLog(
00388         segmentAccessor,
00389         header.onlineUuid,
00390         pSegmentFactory);
00391     pTxnLog->checkpoint(header.txnLogCheckpointMemento);
00392 }
00393 
00394 SharedSegment Database::createShadowLog(DeviceMode shadowLogMode)
00395 {
00396     SharedRandomAccessDevice pShadowDevice(
00397         new RandomAccessFileDevice(shadowDeviceName,shadowLogMode));
00398     pCache->registerDevice(shadowDeviceId,pShadowDevice);
00399 
00400     LinearDeviceSegmentParams deviceParams;
00401     readDeviceParams(paramShadowLogPrefix,shadowLogMode,deviceParams);
00402     CompoundId::setDeviceId(deviceParams.firstBlockId,shadowDeviceId);
00403     CompoundId::setBlockNum(deviceParams.firstBlockId,0);
00404 
00405     if (forceTxns) {
00406         if (shadowLogMode.create) {
00407             // start allocating from beginning of device
00408             deviceParams.nPagesAllocated = 0;
00409         } else {
00410             // treat entire device as pre-allocated because we're
00411             // going to scan it during recovery
00412             deviceParams.nPagesAllocated = MAXU;
00413         }
00414     } else {
00415         deviceParams.nPagesAllocated = MAXU;
00416         deviceParams.nPagesIncrement = 0;
00417         deviceParams.nPagesMax = deviceParams.nPagesMin;
00418     }
00419 
00420     SharedSegment pShadowSegment =
00421         pSegmentFactory->newLinearDeviceSegment(
00422             pCache,
00423             deviceParams);
00424 
00425     PageId oldestPageId = NULL_PAGE_ID;
00426     if (!shadowLogMode.create) {
00427         oldestPageId = header.shadowRecoveryPageId;
00428     }
00429 
00430     if (!forceTxns) {
00431         pShadowSegment = pSegmentFactory->newCircularSegment(
00432             pShadowSegment,
00433             pCheckpointThread,
00434             oldestPageId);
00435     }
00436 
00437     return pSegmentFactory->newWALSegment(pShadowSegment);
00438 }
00439 
00440 void Database::createDataDevice(LinearDeviceSegmentParams &deviceParams)
00441 {
00442     readDeviceParams(paramDatabasePrefix,openMode,deviceParams);
00443 
00444     FileSize initialSize = FileSize(0);
00445     if (shouldForceTxns()) {
00446         // include +2 for the database header pages
00447         initialSize = (deviceParams.nPagesMin + 2) * pCache->getPageSize();
00448     }
00449 
00450     pDataDevice =
00451         SharedRandomAccessDevice(
00452             new RandomAccessFileDevice(
00453                 dataDeviceName,
00454                 openMode,
00455                 initialSize));
00456     pCache->registerDevice(dataDeviceId,pDataDevice);
00457 }
00458 
00459 void Database::createDataSegment(
00460     SharedSegment pShadowLogSegment,
00461     LinearDeviceSegmentParams &deviceParams)
00462 {
00463     // first data BlockId is located after the two database header pages
00464     CompoundId::setDeviceId(deviceParams.firstBlockId,dataDeviceId);
00465     CompoundId::setBlockNum(deviceParams.firstBlockId,2);
00466 
00467     deviceParams.nPagesAllocated = MAXU;
00468 
00469     SharedSegment pDataDeviceSegment =
00470         pSegmentFactory->newLinearDeviceSegment(
00471             pCache,
00472             deviceParams);
00473 
00474     SharedSegment pVersionedDataSegment =
00475         pSegmentFactory->newVersionedSegment(
00476             pDataDeviceSegment,
00477             pShadowLogSegment,
00478             header.onlineUuid,
00479             header.versionNumber + (recoveryRequired ? 0 : 1));
00480 
00481     pVersionedSegment = SegmentFactory::dynamicCast<VersionedSegment *>(
00482         pVersionedDataSegment);
00483 
00484     // If recovery is required, defer initializing the data segment for use
00485     // until after recovery has completed.
00486     if (areSnapshotsEnabled()) {
00487         pDataSegment =
00488             pSegmentFactory->newVersionedRandomAllocationSegment(
00489                 pVersionedDataSegment,
00490                 pTempSegment,
00491                 openMode.create,
00492                 recoveryRequired);
00493     } else {
00494         pDataSegment =
00495             pSegmentFactory->newRandomAllocationSegment(
00496                 pVersionedDataSegment,
00497                 openMode.create,
00498                 recoveryRequired);
00499     }
00500 }
00501 
00502 void Database::createTempSegment()
00503 {
00504     // If the temp device file already exists, use it; otherwise, create it.
00505     // There's no point in recreating it on every startup.  But REVIEW:  sizing
00506     // issues.
00507     DeviceMode tempMode = openMode;
00508     tempMode.create = !FileSystem::doesFileExist(tempDeviceName.c_str());
00509 
00510     LinearDeviceSegmentParams deviceParams;
00511     readDeviceParams(paramTempPrefix,tempMode,deviceParams);
00512     FileSize initialSize = FileSize(0);
00513     if (shouldForceTxns()) {
00514         initialSize = deviceParams.nPagesMin * pCache->getPageSize();
00515     }
00516 
00517     SharedRandomAccessDevice pTempDevice(
00518         new RandomAccessFileDevice(tempDeviceName,tempMode,initialSize));
00519     pCache->registerDevice(tempDeviceId,pTempDevice);
00520 
00521     // This forces the full device size to be used.
00522     tempMode.create = false;
00523 
00524     // no header for temp device
00525     CompoundId::setDeviceId(deviceParams.firstBlockId,tempDeviceId);
00526     CompoundId::setBlockNum(deviceParams.firstBlockId,0);
00527 
00528     SharedSegment pTempDeviceSegment =
00529         pSegmentFactory->newLinearDeviceSegment(
00530             pCache,
00531             deviceParams);
00532 
00533     // Reformat any existing temp data.
00534     pTempSegment =
00535         pSegmentFactory->newRandomAllocationSegment(
00536             pTempDeviceSegment,
00537             true);
00538 }
00539 
00540 const ConfigMap& Database::getConfigMap() const
00541 {
00542     return configMap;
00543 }
00544 
00545 SharedCache Database::getCache() const
00546 {
00547     return pCache;
00548 }
00549 
00550 SharedSegment Database::getDataSegment() const
00551 {
00552     return pDataSegment;
00553 }
00554 
00555 SharedSegment Database::getTempSegment() const
00556 {
00557     return pTempSegment;
00558 }
00559 
00560 SharedCheckpointThread Database::getCheckpointThread() const
00561 {
00562     return pCheckpointThread;
00563 }
00564 
00565 SharedSegment Database::getSegmentById(
00566     SegmentId segmentId,
00567     SharedSegment pDataSegment)
00568 {
00569     if (segmentId == TEMP_SEGMENT_ID) {
00570         return getTempSegment();
00571     } else {
00572         assert(segmentId == DEFAULT_DATA_SEGMENT_ID);
00573         if (pDataSegment) {
00574             return pDataSegment;
00575         } else {
00576             return getDataSegment();
00577         }
00578     }
00579 }
00580 
00581 SharedSegmentFactory Database::getSegmentFactory() const
00582 {
00583     return pSegmentFactory;
00584 }
00585 
00586 SharedLogicalTxnLog Database::getTxnLog() const
00587 {
00588     return pTxnLog;
00589 }
00590 
00591 void Database::allocateHeader()
00592 {
00593     LinearDeviceSegmentParams deviceParams;
00594     CompoundId::setDeviceId(deviceParams.firstBlockId,dataDeviceId);
00595     CompoundId::setBlockNum(deviceParams.firstBlockId,0);
00596     pHeaderSegment =
00597         pSegmentFactory->newLinearDeviceSegment(
00598             pCache,
00599             deviceParams);
00600 
00601     SegmentAccessor segmentAccessor(pHeaderSegment,pCache);
00602     DatabaseHeaderPageLock headerPageLock(segmentAccessor);
00603 
00604     PageId pageId;
00605     pTxnLog->setNextTxnId(FIRST_TXN_ID);
00606     pageId = headerPageLock.allocatePage();
00607     assert(pageId == headerPageId1);
00608     headerPageLock.getNodeForWrite() = header;
00609     pageId = headerPageLock.allocatePage();
00610     assert(pageId == headerPageId2);
00611     headerPageLock.getNodeForWrite() = header;
00612     headerPageLock.unlock();
00613     pHeaderSegment->checkpoint();
00614 }
00615 
00616 void Database::loadHeader(bool recovery)
00617 {
00618     LinearDeviceSegmentParams deviceParams;
00619     CompoundId::setDeviceId(deviceParams.firstBlockId,dataDeviceId);
00620     CompoundId::setBlockNum(deviceParams.firstBlockId,0);
00621     deviceParams.nPagesAllocated = 2;
00622     deviceParams.nPagesMax = 2;
00623     pHeaderSegment =
00624         pSegmentFactory->newLinearDeviceSegment(
00625             pCache,
00626             deviceParams);
00627 
00628     SegmentAccessor segmentAccessor(pHeaderSegment,pCache);
00629     DatabaseHeaderPageLock headerPageLock1(segmentAccessor);
00630     headerPageLock1.lockShared(headerPageId1);
00631     if (!headerPageLock1.checkMagicNumber()) {
00632         throw DataFormatExcn();
00633     }
00634 
00635     DatabaseHeaderPageLock headerPageLock2(segmentAccessor);
00636     headerPageLock2.lockShared(headerPageId2);
00637     if (!headerPageLock2.checkMagicNumber()) {
00638         throw DataFormatExcn();
00639     }
00640 
00641     DatabaseHeader const &header1 = headerPageLock1.getNodeForRead();
00642     DatabaseHeader const &header2 = headerPageLock2.getNodeForRead();
00643     if (recovery) {
00644         // TODO:  crc
00645         if (header2.versionNumber < header1.versionNumber) {
00646             header = header2;
00647         } else {
00648             header = header1;
00649         }
00650     } else {
00651         assert(header1.versionNumber == header2.versionNumber);
00652         // REVIEW:  should assert other fields equal as well?
00653         header = header1;
00654     }
00655     if (pTxnLog) {
00656         TxnId nextTxnId = header.txnLogCheckpointMemento.nextTxnId;
00657         pTxnLog->setNextTxnId(nextTxnId);
00658     }
00659 }
00660 
00661 TxnId Database::getLastCommittedTxnId()
00662 {
00663     return header.txnLogCheckpointMemento.nextTxnId - 1;
00664 }
00665 
00666 void Database::checkpointImpl(CheckpointType checkpointType)
00667 {
00668     assert(!isRecoveryRequired());
00669 
00670     if (checkpointType == CHECKPOINT_DISCARD) {
00671         recoveryRequired = true;
00672         // NOTE jvs 6-Mar-2006:  record this BEFORE anything else,
00673         // since pDataSegment->checkpoint(CHECKPOINT_DISCARD) will
00674         // destroy it
00675         header.shadowRecoveryPageId =
00676             pVersionedSegment->getOnlineRecoveryPageId();
00677         pDataSegment->checkpoint(checkpointType);
00678         if (!forceTxns) {
00679             // REVIEW jvs 8-Mar-2006:  I put in the forceTxns test
00680             // because when forceTxn is true, we actually use
00681             // CHECKPOINT_DISCARD for rollback, and there we DON'T
00682             // want to remove the other uncommitted transactions,
00683             // which is a side-effect of LogicalTxnLog::checkpoint(DISCARD).
00684             // Really, for forceTxns, we shouldn't be using
00685             // LogicalTxnLog at all.  And we should discriminate
00686             // between CHECKPOINT_DISCARD used to simulate a crash in
00687             // tests and CHECKPOINT_DISCARD used to implement rollback
00688             // as part of forceTxns.
00689             LogicalTxnLogCheckpointMemento crashMemento;
00690             pTxnLog->checkpoint(crashMemento,checkpointType);
00691         }
00692         return;
00693     }
00694 
00695     header.versionNumber = pVersionedSegment->getVersionNumber();
00696     if (checkpointType == CHECKPOINT_FLUSH_FUZZY) {
00697         header.versionNumber--;
00698     }
00699     pDataSegment->checkpoint(checkpointType);
00700     header.shadowRecoveryPageId = pVersionedSegment->getRecoveryPageId();
00701     pTxnLog->checkpoint(
00702         header.txnLogCheckpointMemento,
00703         checkpointType);
00704     writeHeader();
00705     pVersionedSegment->deallocateCheckpointedLog(checkpointType);
00706     pTxnLog->deallocateCheckpointedLog(
00707         header.txnLogCheckpointMemento,checkpointType);
00708 
00709     StrictMutexGuard mutexGuard(mutex);
00710     // TODO:  provide a counter which records the amount of data flushed by the
00711     // checkpoint
00712     ++nCheckpointsStat;
00713     ++nCheckpoints;
00714     condition.notify_all();
00715 }
00716 
00717 void Database::requestCheckpoint(CheckpointType checkpointType,bool async)
00718 {
00719     StrictMutexGuard mutexGuard(mutex);
00720     uint nCheckpointsBefore = nCheckpoints;
00721     mutexGuard.unlock();
00722 
00723     if (forceTxns && (checkpointType == CHECKPOINT_FLUSH_FUZZY)) {
00724         // fuzzy checkpoints aren't meaningful in forceTxns mode,
00725         // so treat them as sharp
00726         checkpointType = CHECKPOINT_FLUSH_ALL;
00727     }
00728 
00729     pCheckpointThread->requestCheckpoint(checkpointType);
00730 
00731     if (async) {
00732         return;
00733     }
00734 
00735     mutexGuard.lock();
00736     while (nCheckpoints == nCheckpointsBefore) {
00737         condition.wait(mutexGuard);
00738     }
00739 }
00740 
00741 void Database::writeHeader()
00742 {
00743     // TODO:  crc
00744 
00745     // NOTE:  use synchronous writes to guarantee that first write completes
00746     // before second one starts (otherwise a crash could leave both copies
00747     // corrupted)
00748 
00749     SegmentAccessor segmentAccessor(pHeaderSegment,pCache);
00750     DatabaseHeaderPageLock headerPageLock(segmentAccessor);
00751 
00752     headerPageLock.lockExclusive(headerPageId1);
00753     headerPageLock.getNodeForWrite() = header;
00754     pCache->flushPage(headerPageLock.getPage(),false);
00755 
00756     headerPageLock.lockExclusive(headerPageId2);
00757     headerPageLock.getNodeForWrite() = header;
00758     pCache->flushPage(headerPageLock.getPage(),false);
00759 }
00760 
00761 void Database::recoverOnline()
00762 {
00763     // REVIEW jvs 8-Aug-2006:  This procedure has one questionable aspect,
00764     // which is that it leaves images of newly allocated pages in cache.
00765     // Updated pages are handled by recovery from the log, but newly allocated
00766     // pages are not.  They will be clean, so they shouldn't really cause any
00767     // trouble, but their presence could be, at a minimum, confusing.
00768 
00769     assert(forceTxns);
00770     header.shadowRecoveryPageId =
00771         pVersionedSegment->getOnlineRecoveryPageId();
00772     pVersionedSegment->prepareOnlineRecovery();
00773     recoveryRequired = true;
00774 
00775     // after recovery, flush recovered data pages; no need to discard them
00776     recoverPhysical(CHECKPOINT_FLUSH_ALL);
00777 
00778     // this will bump up version number to be used by further page writes
00779     checkpointImpl(CHECKPOINT_FLUSH_ALL);
00780 }
00781 
00782 void Database::recover(
00783     LogicalTxnParticipantFactory &txnParticipantFactory)
00784 {
00785     recoverPhysical(CHECKPOINT_FLUSH_AND_UNMAP);
00786 
00787     // REVIEW:  are shadows being correctly logged during recovery?  They have
00788     // to be, otherwise we can't re-recover after a failed recovery.
00789 
00790     // TODO:  encapsulate memento->PageId translation in txn somewhere
00791     SharedSegment pTxnLogSegment = createTxnLogSegment(
00792         openMode,
00793         CompoundId::getPageId(
00794             header.txnLogCheckpointMemento.logPosition.segByteId));
00795 
00796     SegmentAccessor logSegmentAccessor(pTxnLogSegment,pCache);
00797 
00798     SharedLogicalRecoveryLog pRecoveryLog =
00799         LogicalRecoveryLog::newLogicalRecoveryLog(
00800             txnParticipantFactory,
00801             logSegmentAccessor,
00802             header.onlineUuid,
00803             pSegmentFactory);
00804     logSegmentAccessor.reset();
00805 
00806     pRecoveryLog->recover(header.txnLogCheckpointMemento);
00807     assert(pRecoveryLog.unique());
00808     pRecoveryLog.reset();
00809     assert(pTxnLogSegment.unique());
00810     pTxnLogSegment.reset();
00811 
00812     closeDevices();
00813     deleteLogs();
00814     FENNEL_TRACE(TRACE_INFO, "recovery completed");
00815 
00816     openSegments();
00817 }
00818 
00819 void Database::recoverPhysical(CheckpointType checkpointType)
00820 {
00821     assert(!openMode.create);
00822     assert(isRecoveryRequired());
00823     recoveryRequired = false;
00824 
00825     FENNEL_TRACE(
00826         TRACE_INFO,
00827         "recovery beginning; page version = "
00828         << header.versionNumber);
00829 
00830     if (header.shadowRecoveryPageId != NULL_PAGE_ID) {
00831         pVersionedSegment->recover(
00832             pDataSegment,
00833             header.shadowRecoveryPageId,
00834             header.versionNumber);
00835         pDataSegment->checkpoint(checkpointType);
00836         header.versionNumber = pVersionedSegment->getVersionNumber();
00837         header.shadowRecoveryPageId = NULL_PAGE_ID;
00838         writeHeader();
00839         pVersionedSegment->deallocateCheckpointedLog(
00840             CHECKPOINT_FLUSH_AND_UNMAP);
00841     }
00842 }
00843 
00844 bool Database::isRecoveryRequired() const
00845 {
00846     return recoveryRequired;
00847 }
00848 
00849 void Database::readDeviceParams(
00850     std::string paramNamePrefix,
00851     DeviceMode deviceMode,
00852     LinearDeviceSegmentParams &deviceParams)
00853 {
00854     deviceParams.nPagesMin = configMap.getIntParam(
00855         paramNamePrefix + paramInitSizeSuffix);
00856     if (configMap.isParamSet(paramNamePrefix + paramIncSizeSuffix)) {
00857         deviceParams.nPagesIncrement = configMap.getIntParam(
00858             paramNamePrefix + paramIncSizeSuffix);
00859     }
00860     deviceParams.nPagesMax = configMap.getIntParam(
00861         paramNamePrefix + paramMaxSizeSuffix);
00862     if (!deviceParams.nPagesMax) {
00863         deviceParams.nPagesMax = MAXU;
00864     }
00865     if (deviceMode.create) {
00866         deviceParams.nPagesAllocated = 0;
00867     } else {
00868         deviceParams.nPagesAllocated = MAXU;
00869     }
00870 }
00871 
00872 StoredTypeDescriptorFactory const &Database::getTypeFactory() const
00873 {
00874     return typeFactory;
00875 }
00876 
00877 void Database::writeStats(StatsTarget &target)
00878 {
00879     pCache->writeStats(target);
00880 
00881     StrictMutexGuard mutexGuard(mutex);
00882     target.writeCounter(
00883         "DatabaseCheckpoints", nCheckpointsStat);
00884     target.writeCounter(
00885         "DatabaseCheckpointsSinceInit", nCheckpoints);
00886     if (pDataSegment) {
00887         target.writeCounter(
00888             "DatabasePagesAllocated", pDataSegment->getAllocatedSizeInPages());
00889         // +2 for the database header pages
00890         target.writeCounter(
00891             "DatabasePagesOccupiedHighWaterSinceInit",
00892             pDataSegment->getNumPagesOccupiedHighWater() + 2);
00893         target.writeCounter(
00894             "DatabasePagesExtendedSinceInit",
00895             pDataSegment->getNumPagesExtended());
00896     }
00897     if (pTempSegment) {
00898         target.writeCounter(
00899             "TempPagesAllocated", pTempSegment->getAllocatedSizeInPages());
00900         target.writeCounter(
00901             "TempPagesOccupiedHighWaterSinceInit",
00902             pTempSegment->getNumPagesOccupiedHighWater());
00903         target.writeCounter(
00904             "TempPagesExtendedSinceInit",
00905             pTempSegment->getNumPagesExtended());
00906     }
00907     nCheckpointsStat = 0;
00908 }
00909 
00910 bool Database::shouldForceTxns() const
00911 {
00912     return forceTxns;
00913 }
00914 
00915 bool Database::areSnapshotsEnabled() const
00916 {
00917     return (forceTxns && !disableSnapshots);
00918 }
00919 
00920 void Database::deallocateOldPages(TxnId oldestLabelCsn)
00921 {
00922     uint iSegAlloc = 0;
00923     ExtentNum extentNum = 0;
00924     // REVIEW zfong 3/12/07 - Determine a good value for numPages.  This
00925     // corresponds to the number of pages we will deallocate during a single
00926     // iteration.  We will be holding the checkpoint mutex for the duration
00927     // of an iteration so we don't want to make the value too big.  But at
00928     // the same time, we don't want to make it too small either, because it
00929     // would then require a large number of iterations to clean out all old
00930     // pages.
00931     uint numPages = 100;
00932 
00933     // Determine the oldest active txnId.
00934     TxnId oldestActiveTxnId = pTxnLog->getOldestActiveTxnId();
00935 
00936     // Take the minimum of the oldest active txnId and the
00937     // oldest active label + 1, if there are any active labels.
00938     // +1 because txns using that label will have ids bigger than that
00939     // label's csn.
00940     TxnId oldestTxnId;
00941     if (oldestLabelCsn == NULL_TXN_ID) {
00942         oldestTxnId = oldestActiveTxnId;
00943     } else {
00944         oldestTxnId = std::min(oldestActiveTxnId, oldestLabelCsn + 1);
00945     }
00946 
00947     // Gather a batch of old pageIds and then deallocate them.  After each
00948     // deallocation, issue a checkpoint to flush the modified allocation
00949     // node pages.  Continue this in a loop until we've read through all
00950     // allocation node pages.
00951 
00952     PageSet oldPageSet;
00953     VersionedRandomAllocationSegment *pVersionedRandomSegment =
00954         SegmentFactory::dynamicCast<VersionedRandomAllocationSegment *>(
00955             pDataSegment);
00956     bool morePages = true;
00957     do {
00958         morePages =
00959             pVersionedRandomSegment->getOldPageIds(
00960                 iSegAlloc,
00961                 extentNum,
00962                 oldestTxnId,
00963                 numPages,
00964                 oldPageSet);
00965 
00966         // Hold the checkpoint mutex while deallocating old pages, if there
00967         // are pages to deallocate.
00968         if (!oldPageSet.empty()) {
00969             SXMutexSharedGuard actionMutexGuard(
00970                 pCheckpointThread->getActionMutex());
00971             if (disableDeallocateOld) {
00972                 return;
00973             }
00974             pVersionedRandomSegment->deallocateOldPages(
00975                 oldPageSet,
00976                 oldestTxnId);
00977 
00978             actionMutexGuard.unlock();
00979             requestCheckpoint(CHECKPOINT_FLUSH_ALL, false);
00980             oldPageSet.clear();
00981         }
00982     } while (morePages);
00983 }
00984 
00985 TxnId Database::initiateBackup(
00986     const std::string &backupFilePathname,
00987     bool checkSpaceRequirements,
00988     FileSize spacePadding,
00989     TxnId lowerBoundCsn,
00990     const std::string &compressionProgram,
00991     FileSize &dataDeviceSize,
00992     const volatile bool &aborted)
00993 {
00994     FENNEL_TRACE(TRACE_FINE, "Started Fennel metadata backup");
00995 
00996     // Snapshots must be enabled
00997     if (!areSnapshotsEnabled()) {
00998         throw FennelExcn(
00999             FennelResource::instance().unsupportedOperation("System backup"));
01000     }
01001 
01002     // Hold the checkpoint mutex while backing up the header and allocation
01003     // node pages
01004     SXMutexSharedGuard actionMutexGuard(pCheckpointThread->getActionMutex());
01005 
01006     // Another backup should not have already been initiated
01007     assert(!disableDeallocateOld);
01008     assert(pBackupRestoreDevice == NULL);
01009 
01010     // The upper bound csn for this backup is the txnId of the last committed,
01011     // write txn.  Note that the next txnId to be assigned may be a larger
01012     // value because of read-only txns.  But we don't care about read-only
01013     // txns.  We just want the txnId that's in sync with what's reflected in
01014     // the header.
01015     TxnId upperBoundCsn = getLastCommittedTxnId();
01016 
01017     disableDeallocateOld = true;
01018     dataDeviceSize = pDataDevice->getSizeInBytes();
01019 
01020     // Use the prefetch setting to determine how many scratch pages to
01021     // allocate.  Note that these scratch pages are not being accounted
01022     // for in the resource governor and come from the reserve pool that
01023     // the resource governor currently sets aside.
01024     uint nScratchPages, rate;
01025     pCache->getPrefetchParams(nScratchPages, rate);
01026 
01027     scratchAccessor = pSegmentFactory->newScratchSegment(pCache);
01028     pBackupRestoreDevice =
01029         SegPageBackupRestoreDevice::newSegPageBackupRestoreDevice(
01030              backupFilePathname,
01031 #ifdef __MSVC__
01032              "wb",
01033 #else
01034              "w",
01035 #endif
01036              compressionProgram,
01037              nScratchPages,
01038              2,
01039              scratchAccessor,
01040              pCache->getDeviceAccessScheduler(*pDataDevice),
01041              pDataDevice);
01042     VersionedRandomAllocationSegment *pVRSegment =
01043         SegmentFactory::dynamicCast<VersionedRandomAllocationSegment *>(
01044             pDataSegment);
01045 
01046     try {
01047         pBackupRestoreDevice->backupPage(
01048             pHeaderSegment->translatePageId(headerPageId1));
01049         pBackupRestoreDevice->backupPage(
01050             pHeaderSegment->translatePageId(headerPageId2));
01051         // First wait for writes of the header pages to complete before backing
01052         // up the allocation node pages.
01053         pBackupRestoreDevice->waitForPendingWrites();
01054         BlockNum nDataPages =
01055             pVRSegment->backupAllocationNodes(
01056                 pBackupRestoreDevice,
01057                 checkSpaceRequirements,
01058                 lowerBoundCsn,
01059                 upperBoundCsn,
01060                 aborted);
01061 
01062         // Verify space if specified, now that we know how many data pages
01063         // will be backed up
01064         if (checkSpaceRequirements) {
01065             FileSize spaceAvailable;
01066             FileSystem::getDiskFreeSpace(
01067                 backupFilePathname.c_str(),
01068                 spaceAvailable);
01069             FileSize spaceRequired =
01070                 nDataPages * pDataSegment->getFullPageSize();
01071             // TODO zfong 9/16/08 - Revisit the compression factor after more
01072             // testing.  Set conservatively to 5, for now.
01073             if (compressionProgram.length() != 0) {
01074                 spaceRequired /= 5;
01075             }
01076             spaceRequired += spacePadding;
01077             if (spaceAvailable < spaceRequired) {
01078                 throw FennelExcn(FennelResource::instance().outOfBackupSpace());
01079             }
01080         }
01081     } catch (...) {
01082         cleanupBackupRestore(true);
01083         // abort exception takes precedence
01084         if (aborted) {
01085             FENNEL_TRACE(TRACE_FINE, "abort detected");
01086             throw AbortExcn();
01087         } else {
01088             throw;
01089         }
01090     }
01091 
01092     FENNEL_TRACE(TRACE_FINE, "Finished Fennel metadata backup");
01093     return upperBoundCsn;
01094 }
01095 
01096 void Database::completeBackup(
01097     TxnId lowerBoundCsn,
01098     TxnId upperBoundCsn,
01099     const volatile bool &aborted)
01100 {
01101     FENNEL_TRACE(TRACE_FINE, "Started Fennel data page backup");
01102     assert(disableDeallocateOld);
01103     assert(pBackupRestoreDevice != NULL);
01104 
01105     VersionedRandomAllocationSegment *pVRSegment =
01106         SegmentFactory::dynamicCast<VersionedRandomAllocationSegment *>(
01107             pDataSegment);
01108     try {
01109         pVRSegment->backupDataPages(
01110             pBackupRestoreDevice,
01111             lowerBoundCsn,
01112             upperBoundCsn,
01113             aborted);
01114         cleanupBackupRestore(true);
01115     } catch (...) {
01116         cleanupBackupRestore(true);
01117         // abort exception takes precedence
01118         if (aborted) {
01119             FENNEL_TRACE(TRACE_FINE, "abort detected");
01120             throw AbortExcn();
01121         } else {
01122             throw;
01123         }
01124     }
01125 
01126     FENNEL_TRACE(TRACE_FINE, "Finished Fennel data page backup");
01127 }
01128 
01129 void Database::abortBackup()
01130 {
01131     FENNEL_TRACE(TRACE_FINE, "Aborting Fennel backup");
01132     cleanupBackupRestore(true);
01133 }
01134 
01135 void Database::restoreFromBackup(
01136     const std::string &backupFilePathname,
01137     FileSize newSize,
01138     const std::string &compressionProgram,
01139     TxnId lowerBoundCsn,
01140     TxnId upperBoundCsn,
01141     const volatile bool &aborted)
01142 {
01143     FENNEL_TRACE(TRACE_FINE, "Started Fennel restore");
01144 
01145     // Snapshots must be enabled
01146     if (!areSnapshotsEnabled()) {
01147         throw FennelExcn(
01148             FennelResource::instance().unsupportedOperation("System restore"));
01149     }
01150 
01151     // Verify that the last committed csn in the database header matches the
01152     // lower bound csn.
01153     if (lowerBoundCsn != NULL_TXN_ID) {
01154         TxnId headerTxnId = getLastCommittedTxnId();
01155         if (headerTxnId != lowerBoundCsn) {
01156             throw FennelExcn(
01157                 FennelResource::instance().mismatchedRestore());
01158         }
01159     }
01160 
01161     pDataDevice->setSizeInBytes(newSize);
01162 
01163     VersionedRandomAllocationSegment *pVRSegment =
01164         SegmentFactory::dynamicCast<VersionedRandomAllocationSegment *>(
01165             pDataSegment);
01166 
01167     uint nScratchPages, rate;
01168     pCache->getPrefetchParams(nScratchPages, rate);
01169 
01170     scratchAccessor =
01171         pSegmentFactory->newScratchSegment(pCache);
01172     pBackupRestoreDevice =
01173         SegPageBackupRestoreDevice::newSegPageBackupRestoreDevice(
01174              backupFilePathname,
01175 #ifdef __MSVC__
01176              "rb",
01177 #else
01178              "r",
01179 #endif
01180              compressionProgram,
01181              nScratchPages,
01182              0,
01183              scratchAccessor,
01184              pCache->getDeviceAccessScheduler(*pDataDevice),
01185              pDataDevice);
01186 
01187     // Flush and unmap pages from the cache that will be restored, i.e., any
01188     // VersionedRandomAllocationSegment or database header pages, including
01189     // the header page just read above.  We need to unmap these pages to
01190     // ensure that the restore doesn't read old pages.
01191     MappedPageListenerPredicate dataPredicate(*pVRSegment);
01192     pCache->checkpointPages(dataPredicate, CHECKPOINT_FLUSH_AND_UNMAP);
01193     MappedPageListenerPredicate headerPredicate(*pHeaderSegment);
01194     pCache->checkpointPages(headerPredicate, CHECKPOINT_FLUSH_AND_UNMAP);
01195 
01196     try {
01197         // Restore the rest of the pages, including the database header pages
01198         pBackupRestoreDevice->restorePage(
01199             pHeaderSegment->translatePageId(headerPageId1));
01200         pBackupRestoreDevice->restorePage(
01201             pHeaderSegment->translatePageId(headerPageId2));
01202         pVRSegment->restoreFromBackup(
01203             pBackupRestoreDevice,
01204             lowerBoundCsn,
01205             upperBoundCsn,
01206             aborted);
01207         cleanupBackupRestore(false);
01208     } catch (...) {
01209         cleanupBackupRestore(false);
01210         // abort exception takes precedence
01211         if (aborted) {
01212             FENNEL_TRACE(TRACE_FINE, "abort detected");
01213             throw AbortExcn();
01214         } else {
01215             throw;
01216         }
01217     }
01218 
01219     // Reload the header pages so future checkpoints will flush the
01220     // restored data.  Issue a recover call on the versioned segment
01221     // to reset the version number and online uuid to the values that are
01222     // now in the header.
01223     loadHeader(false);
01224     pVersionedSegment->recover(
01225         pDataSegment,
01226         NULL_PAGE_ID,
01227         header.versionNumber,
01228         header.onlineUuid);
01229 
01230     FENNEL_TRACE(TRACE_FINE, "Finished Fennel restore");
01231 }
01232 
01233 void Database::cleanupBackupRestore(bool isBackup)
01234 {
01235     if (pBackupRestoreDevice) {
01236         pBackupRestoreDevice.reset();
01237     }
01238     if (scratchAccessor.pSegment) {
01239         scratchAccessor.reset();
01240     }
01241     if (isBackup) {
01242         SXMutexSharedGuard actionMutexGuard(
01243             pCheckpointThread->getActionMutex());
01244         disableDeallocateOld = false;
01245     }
01246 }
01247 
01248 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/db/Database.cpp#43 $");
01249 
01250 // End Database.cpp

Generated on Mon Jun 22 04:00:18 2009 for Fennel by  doxygen 1.5.1