FlatFileExecStreamImpl.h

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/flatfile/FlatFileExecStreamImpl.h#2 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 The Eigenbase Project
00005 // Copyright (C) 2009-2009 SQLstream, Inc.
00006 // Copyright (C) 2004-2009 LucidEra, Inc.
00007 // Portions Copyright (C) 2004-2009 John V. Sichi
00008 //
00009 // This program is free software; you can redistribute it and/or modify it
00010 // under the terms of the GNU General Public License as published by the Free
00011 // Software Foundation; either version 2 of the License, or (at your option)
00012 // any later version approved by The Eigenbase Project.
00013 //
00014 // This program is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 */
00023 
00024 #ifndef Fennel_FlatFileExecStreamImpl_Included
00025 #define Fennel_FlatFileExecStreamImpl_Included
00026 
00027 #include "fennel/flatfile/FlatFileBuffer.h"
00028 #include "fennel/flatfile/FlatFileExecStream.h"
00029 #include "fennel/flatfile/FlatFileParser.h"
00030 #include "fennel/segment/SegmentAccessor.h"
00031 #include "fennel/segment/SegPageLock.h"
00032 #include "fennel/tuple/TupleData.h"
00033 
00034 #include <boost/scoped_ptr.hpp>
00035 
00036 FENNEL_BEGIN_NAMESPACE
00037 
00041 const int FLAT_FILE_MAX_COLUMN_NAME_LEN = 255;
00042 
00046 const int FLAT_FILE_MAX_NON_CHAR_VALUE_LEN = 255;
00047 
00054 class FENNEL_FLATFILE_EXPORT FlatFileExecStreamImpl
00055     : public FlatFileExecStream
00056 {
00057     // max length of text for a row when signalling an error
00058     static const uint MAX_ROW_ERROR_TEXT_WIDTH;
00059 
00060     // parameters
00061     std::string dataFilePath;
00062     bool header;
00063     bool lenient;
00064     bool trim;
00065     bool mapped;
00066     std::vector<std::string> columnNames;
00067 
00068     FlatFileRowDescriptor rowDesc;
00069     SharedFlatFileBuffer pBuffer;
00070     PBuffer pBufferStorage;
00071     char *next;
00072     SharedFlatFileParser pParser;
00073     FlatFileRowParseResult lastResult;
00074     TupleDescriptor textDesc;
00075     TupleData textTuple, dataTuple;
00076     bool isRowPending;
00077 
00078     // for sampling/describe mode
00079     FlatFileMode mode;
00080     int numRowsScan;
00081     bool done;
00082     VectorOfUint fieldSizes;
00083     std::string describeResult;
00084 
00085     SegPageLock bufferLock;
00086     SegmentAccessor scratchAccessor;
00087 
00088     // error handling
00089     uint nRowsOutput, nRowErrors;
00090     std::string reason;
00091     TupleDescriptor errorDesc;
00092     TupleData errorTuple;
00093 
00094     // implement ExecStream
00095     virtual void closeImpl();
00096 
00100     void releaseResources();
00101 
00107     uint findField(const std::string &name);
00108 
00122     FlatFileRowDescriptor readTupleDescriptor(
00123         const TupleDescriptor &tupleDesc);
00124 
00134     void handleTuple(
00135         FlatFileRowParseResult &result,
00136         TupleData &tuple);
00137 
00142     void describeStream(TupleData &tupleData);
00143 
00148     void logError(const FlatFileRowParseResult &result);
00149 
00153     void logError(
00154         const std::string reason,
00155         const FlatFileRowParseResult &result);
00156 
00160     void checkRowDelimiter();
00161 
00162 public:
00163     // implement ExecStream
00164     virtual void prepare(FlatFileExecStreamParams const &params);
00165     virtual void getResourceRequirements(
00166         ExecStreamResourceQuantity &minQuantity,
00167         ExecStreamResourceQuantity &optQuantity);
00168     virtual void open(bool restart);
00169     virtual ExecStreamResult execute(ExecStreamQuantum const &quantum);
00170 };
00171 
00172 FENNEL_END_NAMESPACE
00173 
00174 #endif
00175 
00176 // End FlatFileExecStreamImpl.h

Generated on Mon Jun 22 04:00:19 2009 for Fennel by  doxygen 1.5.1