00001 /* 00002 // $Id: //open/dev/fennel/flatfile/FlatFileExecStreamImpl.h#2 $ 00003 // Fennel is a library of data storage and processing components. 00004 // Copyright (C) 2005-2009 The Eigenbase Project 00005 // Copyright (C) 2009-2009 SQLstream, Inc. 00006 // Copyright (C) 2004-2009 LucidEra, Inc. 00007 // Portions Copyright (C) 2004-2009 John V. Sichi 00008 // 00009 // This program is free software; you can redistribute it and/or modify it 00010 // under the terms of the GNU General Public License as published by the Free 00011 // Software Foundation; either version 2 of the License, or (at your option) 00012 // any later version approved by The Eigenbase Project. 00013 // 00014 // This program is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 // GNU General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU General Public License 00020 // along with this program; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 */ 00023 00024 #ifndef Fennel_FlatFileExecStreamImpl_Included 00025 #define Fennel_FlatFileExecStreamImpl_Included 00026 00027 #include "fennel/flatfile/FlatFileBuffer.h" 00028 #include "fennel/flatfile/FlatFileExecStream.h" 00029 #include "fennel/flatfile/FlatFileParser.h" 00030 #include "fennel/segment/SegmentAccessor.h" 00031 #include "fennel/segment/SegPageLock.h" 00032 #include "fennel/tuple/TupleData.h" 00033 00034 #include <boost/scoped_ptr.hpp> 00035 00036 FENNEL_BEGIN_NAMESPACE 00037 00041 const int FLAT_FILE_MAX_COLUMN_NAME_LEN = 255; 00042 00046 const int FLAT_FILE_MAX_NON_CHAR_VALUE_LEN = 255; 00047 00054 class FENNEL_FLATFILE_EXPORT FlatFileExecStreamImpl 00055 : public FlatFileExecStream 00056 { 00057 // max length of text for a row when signalling an error 00058 static const uint MAX_ROW_ERROR_TEXT_WIDTH; 00059 00060 // parameters 00061 std::string dataFilePath; 00062 bool header; 00063 bool lenient; 00064 bool trim; 00065 bool mapped; 00066 std::vector<std::string> columnNames; 00067 00068 FlatFileRowDescriptor rowDesc; 00069 SharedFlatFileBuffer pBuffer; 00070 PBuffer pBufferStorage; 00071 char *next; 00072 SharedFlatFileParser pParser; 00073 FlatFileRowParseResult lastResult; 00074 TupleDescriptor textDesc; 00075 TupleData textTuple, dataTuple; 00076 bool isRowPending; 00077 00078 // for sampling/describe mode 00079 FlatFileMode mode; 00080 int numRowsScan; 00081 bool done; 00082 VectorOfUint fieldSizes; 00083 std::string describeResult; 00084 00085 SegPageLock bufferLock; 00086 SegmentAccessor scratchAccessor; 00087 00088 // error handling 00089 uint nRowsOutput, nRowErrors; 00090 std::string reason; 00091 TupleDescriptor errorDesc; 00092 TupleData errorTuple; 00093 00094 // implement ExecStream 00095 virtual void closeImpl(); 00096 00100 void releaseResources(); 00101 00107 uint findField(const std::string &name); 00108 00122 FlatFileRowDescriptor readTupleDescriptor( 00123 const TupleDescriptor &tupleDesc); 00124 00134 void handleTuple( 00135 FlatFileRowParseResult &result, 00136 TupleData &tuple); 00137 00142 void describeStream(TupleData &tupleData); 00143 00148 void logError(const FlatFileRowParseResult &result); 00149 00153 void logError( 00154 const std::string reason, 00155 const FlatFileRowParseResult &result); 00156 00160 void checkRowDelimiter(); 00161 00162 public: 00163 // implement ExecStream 00164 virtual void prepare(FlatFileExecStreamParams const ¶ms); 00165 virtual void getResourceRequirements( 00166 ExecStreamResourceQuantity &minQuantity, 00167 ExecStreamResourceQuantity &optQuantity); 00168 virtual void open(bool restart); 00169 virtual ExecStreamResult execute(ExecStreamQuantum const &quantum); 00170 }; 00171 00172 FENNEL_END_NAMESPACE 00173 00174 #endif 00175 00176 // End FlatFileExecStreamImpl.h