00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef Fennel_SqlRegExp_Included
00028 #define Fennel_SqlRegExp_Included
00029
00030 #include <string>
00031 #include <boost/regex.hpp>
00032
00033 #ifdef HAVE_ICU
00034 #include <unicode/ustring.h>
00035 #endif
00036
00037 FENNEL_BEGIN_NAMESPACE
00038
00039 #if !(defined LITTLEENDIAN || defined BIGENDIAN)
00040 #error "endian not defined"
00041 #endif
00042
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00065 void
00066 SqlLikePrep(
00067 char const * const pattern,
00068 int patternLenBytes,
00069 char const * const escape,
00070 int escapeLenBytes,
00071 std::string& expPat)
00072 {
00073 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00074 if (CodeUnitBytes == 1) {
00075
00076
00077 if (patternLenBytes == 0) {
00078
00079
00080
00081
00082 expPat.assign("UNUSED");
00083 return;
00084 }
00085
00086 bool escapeIsRegexpSpecial = false;
00087 std::string special("_%.|*?+(){}[]^$\\");
00088 char escapeChar;
00089 if (escapeLenBytes == 1) {
00090 escapeChar = *escape;
00091 if (special.find(escapeChar) != std::string::npos &&
00092 escapeChar != '_' &&
00093 escapeChar != '%') {
00094
00095
00096
00097
00098
00099 escapeIsRegexpSpecial = true;
00100 }
00101 special.append(1, escapeChar);
00102 } else {
00103 if (!escape & !escapeLenBytes) {
00104
00105 escapeChar = 0;
00106 } else {
00107
00108
00109 throw "22019";
00110 }
00111 }
00112
00113 expPat.assign(pattern, patternLenBytes);
00114
00115
00116
00117
00118
00119
00120
00121 size_t pos = 0;
00122 while ((pos = expPat.find_first_of(special, pos)) !=
00123 std::string::npos) {
00124 if (expPat[pos] == escapeChar) {
00125 if (pos + 1 >= expPat.size() ||
00126 (expPat[pos + 1] != '_'
00127 && expPat[pos + 1] != '%'
00128 && expPat[pos + 1] != escapeChar))
00129 {
00130
00131
00132
00133 throw "22025";
00134 }
00135 if (escapeIsRegexpSpecial
00136 && expPat[pos + 1] == escapeChar)
00137 {
00138 expPat[pos] = '\\';
00139 pos += 2;
00140 } else {
00141 expPat.erase(pos, 1);
00142 pos++;
00143 }
00144 } else {
00145 switch (expPat[pos]) {
00146 case '_':
00147 expPat.replace(pos, 1, ".");
00148 pos++;
00149 break;
00150 case '%':
00151 expPat.replace(pos, 1, ".*");
00152 pos += 2;
00153 break;
00154 case '\\':
00155
00156
00157
00158
00159
00160
00161
00162 expPat.insert(pos, "\\", 1);
00163 pos += 2;
00164 break;
00165
00166 default:
00167
00168 expPat.insert(pos, "\\", 1);
00169 pos += 2;
00170 }
00171 }
00172 }
00173
00174 } else if (CodeUnitBytes == 2) {
00175
00176
00177 throw std::logic_error("no UCS2");
00178 } else {
00179 throw std::logic_error("no such encoding");
00180 }
00181 } else {
00182 throw std::logic_error("no UTF8/16/32");
00183 }
00184 }
00185
00188 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00189 void
00190 SqlSimilarPrepEscapeProcessing(
00191 char const * const escape,
00192 int escapeLenBytes,
00193 char& escapeChar,
00194 std::string const & expPat,
00195 std::string& sqlSpecial)
00196 {
00197 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00198 CodeUnitBytes == 1) {
00199
00200
00201 if (escapeLenBytes == 1) {
00202 escapeChar = *escape;
00203 sqlSpecial.append(1, escapeChar);
00204
00205
00206
00207
00208
00209
00210 char const * const SqlSimilarPrepGeneralRule3b = "[]()|^-+*_%?{}";
00211
00212 if (strchr(SqlSimilarPrepGeneralRule3b, escapeChar)) {
00213
00214
00215
00216 size_t pos = 0;
00217 while ((pos = expPat.find(escapeChar, pos)) !=
00218 std::string::npos) {
00219 if (pos + 1 >= expPat.size() ||
00220 !strchr(
00221 SqlSimilarPrepGeneralRule3b,
00222 expPat[pos + 1]))
00223 {
00224
00225
00226 throw "2200C";
00227 }
00228 pos += 2;
00229 }
00230 }
00231 if (escapeChar == ':' &&
00232 ((expPat.find("[:") != std::string::npos ||
00233 expPat.find(":]") != std::string::npos))) {
00234
00235
00236 throw "2200B";
00237 }
00238 } else {
00239 if (!escape & ! escapeLenBytes) {
00240
00241 escapeChar = 0;
00242 } else {
00243
00244
00245 throw "22019";
00246 }
00247 }
00248 }
00249 }
00250
00251
00252
00253
00254
00255
00256 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00257 void
00258 SqlSimilarPrepRewriteCharEnumeration(
00259 std::string& expPat,
00260 size_t& pos,
00261 char const * const SqlSimilarPrepSyntaxRule6,
00262 char escapeChar)
00263 {
00264 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00265 CodeUnitBytes == 1) {
00266
00267
00268
00269
00270
00271
00272
00273 if (!expPat.compare(pos, 3, "^[:")) {
00274
00275 pos++;
00276 } else if (!expPat.compare(pos, 2, "[:")) {
00277
00278 } else {
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 std::string syntaxRule6ForCharEnum("[]()|+*_%?{}");
00290 syntaxRule6ForCharEnum.append(escapeChar, 1);
00291
00292 size_t pos2 = pos;
00293 while ((pos2 = expPat.find_first_of(syntaxRule6ForCharEnum, pos2))
00294 != std::string::npos) {
00295 if (expPat[pos2] == escapeChar) {
00296
00297 pos2 += 2;
00298 } else if (expPat[pos2] == ']') {
00299
00300 break;
00301 } else {
00302
00303
00304
00305
00306
00307 throw "2201B";
00308 }
00309 }
00310 return;
00311 }
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341 char const * const regCharSetIdent[][2] = {
00342 { "[:ALPHA:]", "[:alpha:]" },
00343 { "[:alpha:]", "[:alpha:]" },
00344 { "[:UPPER:]", "[:upper:]" },
00345 { "[:upper:]", "[:upper:]" },
00346 { "[:LOWER:]", "[:lower:]" },
00347 { "[:lower:]", "[:lower:]" },
00348 { "[:DIGIT:]", "[:digit:]" },
00349 { "[:digit:]", "[:digit:]" },
00350 { "[:SPACE:]", " " },
00351 { "[:space:]", " " },
00352 { "[:WHITESPACE:]", "\x20\xa0\x09\x0a\x0b\x0c\x0d\x85" },
00353 { "[:whitespace:]", "\x20\xa0\x09\x0a\x0b\x0c\x0d\x85" },
00354 { "[:ALNUM:]", "[:alnum:]" },
00355 { "[:alnum:]", "[:alnum:]" },
00356 { "", "" },
00357 };
00358 int i, len;
00359 for (i = 0; *regCharSetIdent[i][0]; i++) {
00360 len = strlen(regCharSetIdent[i][0]);
00361 if (!expPat.compare(pos, len, regCharSetIdent[i][0])) {
00362 expPat.replace(pos, len, regCharSetIdent[i][1]);
00363 pos += strlen(regCharSetIdent[i][1]);
00364 return;
00365 }
00366 }
00367
00368
00369 throw "2201B";
00370 }
00371 }
00372
00373
00374
00375
00376 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00377 void
00378 SqlSimilarPrepReWrite(
00379 char escapeChar,
00380 std::string& expPat,
00381 std::string& sqlSpecial)
00382 {
00383 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00384 CodeUnitBytes == 1) {
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394 char const * const SqlSimilarPrepSyntaxRule6 = "[]()|^-+*_%?{}";
00395
00396 char const * const BoostRegExEscapeChar = "\\";
00397
00398
00399
00400
00401
00402
00403 size_t pos = 0;
00404 bool characterEnumeration = false;
00405 while ((pos = expPat.find_first_of(sqlSpecial, pos)) !=
00406 std::string::npos)
00407 {
00408 if (expPat[pos] == escapeChar) {
00409 if (pos + 1 >= expPat.size()) {
00410
00411
00412
00413 throw "2201B";
00414 }
00415 if (strchr(SqlSimilarPrepSyntaxRule6, expPat[pos + 1])) {
00416
00417
00418
00419 expPat.replace(pos, 1, BoostRegExEscapeChar);
00420
00421 pos += 2;
00422 } else if (expPat[pos + 1] == escapeChar) {
00423
00424
00425
00426 expPat.erase(pos, 1);
00427
00428 pos++;
00429 } else {
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441 throw "2201B";
00442 }
00443 } else {
00444 switch (expPat[pos]) {
00445 case '[':
00446
00447
00448
00449
00450 characterEnumeration = true;
00451 pos++;
00452 SqlSimilarPrepRewriteCharEnumeration
00453 <CodeUnitBytes, MaxCodeUnitsPerCodePoint>
00454 (expPat, pos, SqlSimilarPrepSyntaxRule6, escapeChar);
00455 break;
00456 case ']':
00457 if (!characterEnumeration) {
00458
00459
00460
00461 throw "2201B";
00462 }
00463 characterEnumeration = false;
00464 pos++;
00465 break;
00466 case '_':
00467 expPat.replace(pos, 1, ".");
00468 pos++;
00469 break;
00470 case '%':
00471 expPat.replace(pos, 1, ".*");
00472 pos += 2;
00473 break;
00474 case '\\':
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489 expPat.replace(pos, 1, "\\\\");
00490 pos += 2;
00491 break;
00492 case '.':
00493
00494 expPat.replace(pos, 1, "\\.");
00495 pos += 2;
00496 break;
00497 case '$':
00498
00499 expPat.replace(pos, 1, "\\$");
00500 pos += 2;
00501 break;
00502 default:
00503 throw std::logic_error("SqlSimilarPrep:escapeSwitch");
00504 }
00505 }
00506 }
00507
00508 if (characterEnumeration) {
00509
00510
00511
00512 throw "2201B";
00513 }
00514 }
00515 }
00516
00536 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00537 void
00538 SqlSimilarPrep(
00539 char const * const pattern,
00540 int patternLenBytes,
00541 char const * const escape,
00542 int escapeLenBytes,
00543 std::string& expPat)
00544 {
00545 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00546 CodeUnitBytes == 1) {
00547
00548
00549 if (patternLenBytes == 0) {
00550
00551
00552
00553
00554 expPat.assign("UNUSED");
00555 return;
00556 }
00557
00558 expPat.assign(pattern, patternLenBytes);
00559
00560
00561
00562 std::string sqlSpecial("\\.$_%[]");
00563 char escapeChar;
00564
00565 SqlSimilarPrepEscapeProcessing<CodeUnitBytes, MaxCodeUnitsPerCodePoint>(
00566 escape,
00567 escapeLenBytes,
00568 escapeChar,
00569 expPat,
00570 sqlSpecial);
00571
00572 SqlSimilarPrepReWrite<CodeUnitBytes, MaxCodeUnitsPerCodePoint>(
00573 escapeChar,
00574 expPat,
00575 sqlSpecial);
00576
00577 } else if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00578 CodeUnitBytes == 2) {
00579
00580
00581
00582
00583
00584 throw std::logic_error("no UCS2");
00585 } else if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00586 throw std::logic_error("no such encoding");
00587 } else {
00588 throw std::logic_error("no UTF8/16/32");
00589 }
00590 }
00591
00601 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00602 bool
00603 SqlRegExp(
00604 char const * const matchValue,
00605 int matchValueLenBytes,
00606 int patternLenBytes,
00607 const boost::regex& exp)
00608 {
00609 if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00610 if (CodeUnitBytes == 1) {
00611
00612
00613 if (patternLenBytes == 0) {
00614 if (matchValueLenBytes == 0) {
00615
00616
00617
00618 return true;
00619 } else {
00620
00621
00622 return false;
00623 }
00624 }
00625
00626 bool result;
00627 try {
00628 result = boost::regex_match(
00629 matchValue,
00630 matchValue + matchValueLenBytes,
00631 exp);
00632 } catch (...) {
00633
00634
00635 throw std::logic_error("boost::regex error in SqlLike");
00636 }
00637 return result;
00638
00639 } else if (CodeUnitBytes == 2) {
00640
00641 throw std::logic_error("no UCS2");
00642 } else {
00643 throw std::logic_error("no such encoding");
00644 }
00645 } else {
00646 throw std::logic_error("no UTF8/16/32");
00647 }
00648 }
00649
00650
00651
00652 FENNEL_END_NAMESPACE
00653
00654 #endif
00655
00656