00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 #ifndef Fennel_SqlRegExp_Included
00028 #define Fennel_SqlRegExp_Included
00029 
00030 #include <string>
00031 #include <boost/regex.hpp>
00032 
00033 #ifdef HAVE_ICU
00034 #include <unicode/ustring.h>
00035 #endif
00036 
00037 FENNEL_BEGIN_NAMESPACE
00038 
00039 #if !(defined LITTLEENDIAN || defined BIGENDIAN)
00040 #error "endian not defined"
00041 #endif
00042 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00065 void
00066 SqlLikePrep(
00067     char const * const pattern,
00068     int patternLenBytes,
00069     char const * const escape,  
00070     int escapeLenBytes,
00071     std::string& expPat)
00072 {
00073     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00074         if (CodeUnitBytes == 1) {
00075             
00076 
00077             if (patternLenBytes == 0) {
00078                 
00079                 
00080                 
00081                 
00082                 expPat.assign("UNUSED");
00083                 return;
00084             }
00085 
00086             bool escapeIsRegexpSpecial = false;
00087             std::string special("_%.|*?+(){}[]^$\\");
00088             char escapeChar;
00089             if (escapeLenBytes == 1) {
00090                 escapeChar = *escape;
00091                 if (special.find(escapeChar) != std::string::npos &&
00092                     escapeChar != '_' &&
00093                     escapeChar != '%') {
00094                     
00095                     
00096                     
00097                     
00098                     
00099                     escapeIsRegexpSpecial = true;
00100                 }
00101                 special.append(1, escapeChar);
00102             } else {
00103                 if (!escape & !escapeLenBytes) {
00104                     
00105                     escapeChar = 0; 
00106                 } else {
00107                     
00108                     
00109                     throw "22019";
00110                 }
00111             }
00112 
00113             expPat.assign(pattern, patternLenBytes);
00114 
00115             
00116             
00117             
00118             
00119             
00120             
00121             size_t pos = 0;
00122             while ((pos = expPat.find_first_of(special, pos)) !=
00123                    std::string::npos) {
00124                 if (expPat[pos] == escapeChar) {
00125                     if (pos + 1 >= expPat.size() ||
00126                         (expPat[pos + 1] != '_'
00127                          && expPat[pos + 1] != '%'
00128                          && expPat[pos + 1] != escapeChar))
00129                     {
00130                         
00131                         
00132                         
00133                         throw "22025";
00134                     }
00135                     if (escapeIsRegexpSpecial
00136                         && expPat[pos + 1] == escapeChar)
00137                     {
00138                         expPat[pos] = '\\'; 
00139                         pos += 2;           
00140                     } else {
00141                         expPat.erase(pos, 1); 
00142                         pos++;               
00143                     }
00144                 } else {
00145                     switch (expPat[pos]) {
00146                     case '_':   
00147                         expPat.replace(pos, 1, ".");
00148                         pos++;
00149                         break;
00150                     case '%':   
00151                         expPat.replace(pos, 1, ".*");
00152                         pos += 2;
00153                         break;
00154                     case '\\':
00155                         
00156                         
00157                         
00158                         
00159                         
00160                         
00161                         
00162                         expPat.insert(pos, "\\", 1);
00163                         pos += 2;
00164                         break;
00165 
00166                     default:    
00167                         
00168                         expPat.insert(pos, "\\", 1);
00169                         pos += 2;
00170                     }
00171                 }
00172             }
00173 
00174         } else if (CodeUnitBytes == 2) {
00175             
00176             
00177             throw std::logic_error("no UCS2");
00178         } else {
00179             throw std::logic_error("no such encoding");
00180         }
00181     } else {
00182         throw std::logic_error("no UTF8/16/32");
00183     }
00184 }
00185 
00188 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00189 void
00190 SqlSimilarPrepEscapeProcessing(
00191     char const * const escape,
00192     int escapeLenBytes,
00193     char& escapeChar,
00194     std::string const & expPat,
00195     std::string& sqlSpecial)
00196 {
00197     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00198         CodeUnitBytes == 1) {
00199         
00200 
00201         if (escapeLenBytes == 1) {
00202             escapeChar = *escape;
00203             sqlSpecial.append(1, escapeChar);
00204 
00205             
00206             
00207             
00208             
00209             
00210             char const * const SqlSimilarPrepGeneralRule3b = "[]()|^-+*_%?{}";
00211 
00212             if (strchr(SqlSimilarPrepGeneralRule3b, escapeChar)) {
00213                 
00214                 
00215                 
00216                 size_t pos = 0;
00217                 while ((pos = expPat.find(escapeChar, pos)) !=
00218                        std::string::npos) {
00219                     if (pos + 1 >= expPat.size() ||
00220                         !strchr(
00221                             SqlSimilarPrepGeneralRule3b,
00222                             expPat[pos + 1]))
00223                     {
00224                         
00225                         
00226                         throw "2200C";
00227                     }
00228                     pos += 2; 
00229                 }
00230             }
00231             if (escapeChar == ':' &&
00232                 ((expPat.find("[:") != std::string::npos ||
00233                   expPat.find(":]") != std::string::npos))) {
00234                 
00235                 
00236                 throw "2200B";
00237             }
00238         } else {
00239             if (!escape & ! escapeLenBytes) {
00240                 
00241                 escapeChar = 0; 
00242             } else {
00243                 
00244                 
00245                 throw "22019";
00246             }
00247         }
00248     }
00249 }
00250 
00251 
00252 
00253 
00254 
00255 
00256 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00257 void
00258 SqlSimilarPrepRewriteCharEnumeration(
00259     std::string& expPat,
00260     size_t& pos,
00261     char const * const SqlSimilarPrepSyntaxRule6,
00262     char escapeChar)
00263 {
00264     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00265         CodeUnitBytes == 1) {
00266         
00267 
00268         
00269         
00270         
00271         
00272         
00273         if (!expPat.compare(pos, 3, "^[:")) {
00274             
00275             pos++;
00276         } else if (!expPat.compare(pos, 2, "[:")) {
00277             
00278         } else {
00279             
00280             
00281             
00282             
00283             
00284             
00285             
00286             
00287             
00288 
00289             std::string syntaxRule6ForCharEnum("[]()|+*_%?{}");
00290             syntaxRule6ForCharEnum.append(escapeChar, 1);
00291 
00292             size_t pos2 = pos;
00293             while ((pos2 = expPat.find_first_of(syntaxRule6ForCharEnum, pos2))
00294                    != std::string::npos) {
00295                 if (expPat[pos2] == escapeChar) {
00296                     
00297                     pos2 += 2;
00298                 } else if (expPat[pos2] == ']') {
00299                     
00300                     break;
00301                 } else {
00302                     
00303                     
00304                     
00305                     
00306                     
00307                     throw "2201B";
00308                 }
00309             }
00310             return;
00311         }
00312 
00313         
00314         
00315         
00316 
00317         
00318         
00319         
00320         
00321         
00322         
00323         
00324         
00325         
00326         
00327         
00328         
00329         
00330         
00331         
00332         
00333         
00334         
00335         
00336         
00337         
00338         
00339         
00340         
00341         char const * const regCharSetIdent[][2] = {
00342             { "[:ALPHA:]", "[:alpha:]" },
00343             { "[:alpha:]", "[:alpha:]" },
00344             { "[:UPPER:]", "[:upper:]" },
00345             { "[:upper:]", "[:upper:]" },
00346             { "[:LOWER:]", "[:lower:]" },
00347             { "[:lower:]", "[:lower:]" },
00348             { "[:DIGIT:]", "[:digit:]" },
00349             { "[:digit:]", "[:digit:]" },
00350             { "[:SPACE:]", " " },
00351             { "[:space:]", " " },
00352             { "[:WHITESPACE:]", "\x20\xa0\x09\x0a\x0b\x0c\x0d\x85" },
00353             { "[:whitespace:]", "\x20\xa0\x09\x0a\x0b\x0c\x0d\x85" },
00354             { "[:ALNUM:]", "[:alnum:]" },
00355             { "[:alnum:]", "[:alnum:]" },
00356             { "", "" },
00357         };
00358         int i, len;
00359         for (i = 0; *regCharSetIdent[i][0]; i++) {
00360             len = strlen(regCharSetIdent[i][0]);
00361             if (!expPat.compare(pos, len, regCharSetIdent[i][0])) {
00362                 expPat.replace(pos, len, regCharSetIdent[i][1]);
00363                 pos += strlen(regCharSetIdent[i][1]);
00364                 return;
00365             }
00366         }
00367         
00368         
00369         throw "2201B";
00370     }
00371 }
00372 
00373 
00374 
00375 
00376 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00377 void
00378 SqlSimilarPrepReWrite(
00379     char escapeChar,
00380     std::string& expPat,
00381     std::string& sqlSpecial)
00382 {
00383     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00384         CodeUnitBytes == 1) {
00385         
00386 
00387         
00388         
00389         
00390         
00391         
00392         
00393         
00394         char const * const SqlSimilarPrepSyntaxRule6 = "[]()|^-+*_%?{}";
00395 
00396         char const * const BoostRegExEscapeChar = "\\";
00397 
00398         
00399         
00400         
00401         
00402 
00403         size_t pos = 0;
00404         bool characterEnumeration = false; 
00405         while ((pos = expPat.find_first_of(sqlSpecial, pos)) !=
00406                std::string::npos)
00407         {
00408             if (expPat[pos] == escapeChar) {
00409                 if (pos + 1 >= expPat.size()) {
00410                     
00411                     
00412                     
00413                     throw "2201B";
00414                 }
00415                 if (strchr(SqlSimilarPrepSyntaxRule6, expPat[pos + 1])) {
00416                     
00417                     
00418                     
00419                     expPat.replace(pos, 1, BoostRegExEscapeChar);
00420                     
00421                     pos += 2;
00422                 } else if (expPat[pos + 1] == escapeChar) {
00423                     
00424                     
00425                     
00426                     expPat.erase(pos, 1);
00427                     
00428                     pos++;
00429                 } else {
00430                     
00431                     
00432                     
00433                     
00434                     
00435                     
00436                     
00437                     
00438                     
00439                     
00440                     
00441                     throw "2201B";
00442                 }
00443             } else {
00444                 switch (expPat[pos]) {
00445                 case '[':
00446                     
00447                     
00448                     
00449                     
00450                     characterEnumeration = true;
00451                     pos++;
00452                     SqlSimilarPrepRewriteCharEnumeration
00453                         <CodeUnitBytes, MaxCodeUnitsPerCodePoint>
00454                         (expPat, pos, SqlSimilarPrepSyntaxRule6, escapeChar);
00455                     break;
00456                 case ']':
00457                     if (!characterEnumeration) {
00458                         
00459                         
00460                         
00461                         throw "2201B";
00462                     }
00463                     characterEnumeration = false;
00464                     pos++;
00465                     break;
00466                 case '_':   
00467                     expPat.replace(pos, 1, ".");
00468                     pos++;
00469                     break;
00470                 case '%':   
00471                     expPat.replace(pos, 1, ".*");
00472                     pos += 2;
00473                     break;
00474                 case '\\':
00475                     
00476                     
00477                     
00478                     
00479                     
00480                     
00481                     
00482                     
00483                     
00484                     
00485                     
00486                     
00487                     
00488                     
00489                     expPat.replace(pos, 1, "\\\\");
00490                     pos += 2;
00491                     break;
00492                 case '.':
00493                     
00494                     expPat.replace(pos, 1, "\\.");
00495                     pos += 2;
00496                     break;
00497                 case '$':
00498                     
00499                     expPat.replace(pos, 1, "\\$");
00500                     pos += 2;
00501                     break;
00502                 default:
00503                     throw std::logic_error("SqlSimilarPrep:escapeSwitch");
00504                 }
00505             }
00506         } 
00507 
00508         if (characterEnumeration) {
00509             
00510             
00511             
00512             throw "2201B";
00513         }
00514     }
00515 }
00516 
00536 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00537 void
00538 SqlSimilarPrep(
00539     char const * const pattern,
00540     int patternLenBytes,
00541     char const * const escape,  
00542     int escapeLenBytes,
00543     std::string& expPat)
00544 {
00545     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00546         CodeUnitBytes == 1) {
00547         
00548 
00549         if (patternLenBytes == 0) {
00550             
00551             
00552             
00553             
00554             expPat.assign("UNUSED");
00555             return;
00556         }
00557 
00558         expPat.assign(pattern, patternLenBytes);
00559 
00560         
00561         
00562         std::string sqlSpecial("\\.$_%[]");
00563         char escapeChar;
00564 
00565         SqlSimilarPrepEscapeProcessing<CodeUnitBytes, MaxCodeUnitsPerCodePoint>(
00566                 escape,
00567                 escapeLenBytes,
00568                 escapeChar,
00569                 expPat,
00570                 sqlSpecial);
00571 
00572         SqlSimilarPrepReWrite<CodeUnitBytes, MaxCodeUnitsPerCodePoint>(
00573             escapeChar,
00574             expPat,
00575             sqlSpecial);
00576 
00577     } else if (CodeUnitBytes == MaxCodeUnitsPerCodePoint &&
00578                CodeUnitBytes == 2) {
00579         
00580         
00581         
00582         
00583         
00584         throw std::logic_error("no UCS2");
00585     } else if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00586         throw std::logic_error("no such encoding");
00587     } else {
00588         throw std::logic_error("no UTF8/16/32");
00589     }
00590 }
00591 
00601 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00602 bool
00603 SqlRegExp(
00604     char const * const matchValue,
00605     int matchValueLenBytes,
00606     int patternLenBytes,
00607     const boost::regex& exp)
00608 {
00609     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00610         if (CodeUnitBytes == 1) {
00611             
00612 
00613             if (patternLenBytes == 0) {
00614                 if (matchValueLenBytes == 0) {
00615                     
00616                     
00617                     
00618                     return true;
00619                 } else {
00620                     
00621                     
00622                     return false;
00623                 }
00624             }
00625 
00626             bool result;
00627             try {
00628                 result = boost::regex_match(
00629                     matchValue,
00630                     matchValue + matchValueLenBytes,
00631                     exp);
00632             } catch (...) {
00633                 
00634                 
00635                 throw std::logic_error("boost::regex error in SqlLike");
00636             }
00637             return result;
00638 
00639         } else if (CodeUnitBytes == 2) {
00640             
00641             throw std::logic_error("no UCS2");
00642         } else {
00643             throw std::logic_error("no such encoding");
00644         }
00645     } else {
00646         throw std::logic_error("no UTF8/16/32");
00647     }
00648 }
00649 
00650 
00651 
00652 FENNEL_END_NAMESPACE
00653 
00654 #endif
00655 
00656