00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef Fennel_FlatFileParser_Included
00024 #define Fennel_FlatFileParser_Included
00025
00026
00027 #include <vector>
00028
00029 FENNEL_BEGIN_NAMESPACE
00030
00031 class FlatFileParser;
00032 typedef boost::shared_ptr<FlatFileParser> SharedFlatFileParser;
00033
00037 class FENNEL_FLATFILE_EXPORT FlatFileColumnParseResult
00038 {
00039 public:
00041 enum DelimiterType {
00043 NO_DELIM = 0,
00045 FIELD_DELIM,
00047 ROW_DELIM,
00049 MAX_LENGTH
00050 };
00051
00055 DelimiterType type;
00056
00060 uint size;
00061
00065 char *next;
00066
00071 void setResult(DelimiterType type, char *buffer, uint size);
00072 };
00073
00077 class FENNEL_FLATFILE_EXPORT FlatFileRowParseResult
00078 {
00079 public:
00081 enum RowStatus {
00085 NO_STATUS = 0,
00090 INCOMPLETE_COLUMN,
00094 ROW_TOO_LARGE,
00099 NO_COLUMN_DELIM,
00103 TOO_FEW_COLUMNS,
00107 TOO_MANY_COLUMNS
00108 };
00109
00110 explicit FlatFileRowParseResult();
00111 void reset();
00112
00116 RowStatus status;
00117
00121 VectorOfUint offsets;
00122
00126 VectorOfUint sizes;
00127
00131 VectorOfUint strippedSizes;
00132
00136 char *current;
00137
00142 char *next;
00143
00147 uint nRowDelimsRead;
00148
00152 uint getReadCount()
00153 {
00154 return offsets.size();
00155 }
00156
00163 char *getColumn(uint iColumn)
00164 {
00165 if (sizes[iColumn] == 0) {
00166 return NULL;
00167 }
00168 return current + offsets[iColumn];
00169 }
00170
00174 uint getRawColumnSize(uint iColumn)
00175 {
00176 return sizes[iColumn];
00177 }
00178
00182 uint getColumnSize(uint iColumn)
00183 {
00184 return strippedSizes[iColumn];
00185 }
00186
00190 void clear()
00191 {
00192 offsets.clear();
00193 sizes.clear();
00194 }
00195
00199 void resize(uint nColumns)
00200 {
00201 offsets.resize(nColumns);
00202 sizes.resize(nColumns);
00203 }
00204
00208 void setColumn(uint iColumn, uint offset, uint size)
00209 {
00210 offsets[iColumn] = offset;
00211 sizes[iColumn] = size;
00212 }
00213
00217 void setNull(uint iColumn)
00218 {
00219 setColumn(iColumn, 0, 0);
00220 }
00221
00225 void addColumn(uint offset, uint size)
00226 {
00227 offsets.push_back(offset);
00228 sizes.push_back(size);
00229 }
00230 };
00231
00235 class FENNEL_FLATFILE_EXPORT FlatFileColumnDescriptor
00236 {
00237 public:
00238 uint maxLength;
00239
00240 #ifdef __MSVC__
00241 explicit FlatFileColumnDescriptor()
00242 {
00243 maxLength = 0;
00244 }
00245 #endif
00246
00247 explicit FlatFileColumnDescriptor(uint maxLengthInit)
00248 {
00249 maxLength = maxLengthInit;
00250 }
00251 };
00252
00261 class FENNEL_FLATFILE_EXPORT FlatFileRowDescriptor
00262 : public std::vector<FlatFileColumnDescriptor>
00263 {
00264 bool bounded;
00265 bool lenient;
00266
00267 VectorOfUint columnMap;
00268
00269 public:
00273 static const int MAX_COLUMNS = 1024;
00274
00279 static const int MAX_COLUMN_LENGTH = 65535;
00280
00284 FlatFileRowDescriptor();
00285
00290 void setUnbounded();
00291
00296 bool isBounded() const;
00297
00304 void setMap(VectorOfUint map)
00305 {
00306 columnMap = map;
00307 }
00308
00312 bool isMapped() const
00313 {
00314 return columnMap.size() > 0;
00315 }
00316
00321 int getMap(uint iSource) const
00322 {
00323 if (iSource >= columnMap.size()) {
00324 return -1;
00325 }
00326 return columnMap[iSource];
00327 }
00328
00329 void setLenient(bool lenientIn)
00330 {
00331 lenient = lenientIn;
00332 }
00333
00334 bool isLenient() const
00335 {
00336 return lenient;
00337 }
00338
00345 uint getMaxColumns() const
00346 {
00347 if (!bounded) {
00348 return MAX_COLUMNS;
00349 } else if (isMapped()) {
00350 return columnMap.size();
00351 } else {
00352 return size();
00353 }
00354 }
00355
00361 uint getMaxLength(uint i) const
00362 {
00363 uint realIndex = 0;
00364 if (!bounded) {
00365 return MAX_COLUMN_LENGTH;
00366 } else if (isMapped()) {
00367 realIndex = getMap(i);
00368 } else {
00369 realIndex = i;
00370 }
00371 if (realIndex < 0 || realIndex >= size()) {
00372 return MAX_COLUMN_LENGTH;
00373 } else {
00374 return (*this)[realIndex].maxLength;
00375 }
00376 }
00377 };
00378
00389 class FENNEL_FLATFILE_EXPORT FlatFileParser
00390 {
00391 char fieldDelim;
00392 char rowDelim;
00393 char quote;
00394 char escape;
00395 bool doTrim;
00396
00400 bool fixed;
00401
00417 const char *scanRowEnd(
00418 const char *buffer,
00419 int size,
00420 bool rowDelim,
00421 FlatFileRowParseResult &result);
00422
00435 const char *scanRowDelim(
00436 const char *buffer,
00437 int size,
00438 bool search);
00439
00445 bool isRowDelim(char c);
00446
00447 public:
00463 FlatFileParser(
00464 const char fieldDelim,
00465 const char rowDelim,
00466 const char quote,
00467 const char escape,
00468 bool doTrim = false);
00469
00498 void scanRow(
00499 const char *buffer,
00500 int size,
00501 const FlatFileRowDescriptor &columns,
00502 FlatFileRowParseResult &result);
00503
00518 void scanColumn(
00519 const char *buffer,
00520 uint size,
00521 uint maxLength,
00522 FlatFileColumnParseResult &result);
00523
00524
00531 void scanFixedColumn(
00532 const char *buffer,
00533 uint size,
00534 uint maxLength,
00535 FlatFileColumnParseResult &result);
00536
00545 void stripQuoting(
00546 FlatFileRowParseResult &rowResult,
00547 bool trim);
00548
00571 uint stripQuoting(char *buffer, uint size, bool untrimmed);
00572
00582 uint trim(char *buffer, uint size);
00583 };
00584
00585 FENNEL_END_NAMESPACE
00586
00587 #endif
00588
00589