SqlRegExpTest.cpp

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/calctest/SqlRegExpTest.cpp#2 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 The Eigenbase Project
00005 // Copyright (C) 2004-2009 SQLstream, Inc.
00006 // Copyright (C) 2009-2009 LucidEra, Inc.
00007 //
00008 // This program is free software; you can redistribute it and/or modify it
00009 // under the terms of the GNU General Public License as published by the Free
00010 // Software Foundation; either version 2 of the License, or (at your option)
00011 // any later version approved by The Eigenbase Project.
00012 //
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public License for more details.
00017 //
00018 // You should have received a copy of the GNU General Public License
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 */
00022 
00023 #include "fennel/common/CommonPreamble.h"
00024 #include "fennel/test/TestBase.h"
00025 #include "fennel/calculator/SqlRegExp.h"
00026 #include "fennel/common/TraceSource.h"
00027 #include "fennel/calctest/SqlStringBuffer.h"
00028 
00029 #include <boost/test/test_tools.hpp>
00030 #include <boost/scoped_array.hpp>
00031 #include <boost/regex.hpp>
00032 
00033 #include <string>
00034 #include <limits>
00035 #include <iostream>
00036 
00037 #ifdef HAVE_ICU
00038 #include <unicode/unistr.h>
00039 #include <unicode/uloc.h>
00040 #endif
00041 
00042 using namespace fennel;
00043 using namespace std;
00044 
00045 class SqlRegExpTest : virtual public TestBase, public TraceSource
00046 {
00047     void testSqlRegExpLikeAsciiTrue();
00048     void testSqlRegExpLikeAsciiFalse();
00049     void testSqlRegExpLikeAsciiEscapeTrue();
00050     void testSqlRegExpLikeAsciiEscapeFalse();
00051     void testSqlRegExpLikeAsciiException();
00052 
00053     void testSqlRegExpSimilarAscii();
00054     void testSqlRegExpSimilarAsciiEscape();
00055     void testSqlRegExpSimilarAsciiException();
00056 
00057 public:
00058     explicit SqlRegExpTest()
00059         : TraceSource(shared_from_this(),"SqlRegExpTest")
00060     {
00061         srand(time(NULL));
00062 
00063         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpLikeAsciiTrue);
00064         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpLikeAsciiFalse);
00065         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpLikeAsciiEscapeTrue);
00066         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpLikeAsciiEscapeFalse);
00067         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpLikeAsciiException);
00068 
00069         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpSimilarAscii);
00070         FENNEL_UNIT_TEST_CASE(SqlRegExpTest, testSqlRegExpSimilarAsciiEscape);
00071         FENNEL_UNIT_TEST_CASE(
00072             SqlRegExpTest, testSqlRegExpSimilarAsciiException);
00073     }
00074 
00075     virtual ~SqlRegExpTest()
00076     {
00077     }
00078 };
00079 
00080 
00081 void
00082 SqlRegExpTest::testSqlRegExpLikeAsciiTrue()
00083 {
00084     bool result = false;
00085     int i;
00086 
00087     const char* test[][2] = {
00088         // pattern, matchValue
00089 
00090         // SQL99 Part 2 Section 8.5 General Rule 3.d.i
00091         { "",    "" },
00092 
00093         { "_",   "a" },
00094         { "a",   "a" },
00095         { "abc", "abc" },
00096         { "_bc", "abc" },
00097         { "a_c", "abc" },
00098         { "ab_", "abc" },
00099         { "a__", "abc" },
00100         { "_b_", "abc" },
00101         { "__c", "abc" },
00102 
00103         { "%",   "" },
00104         { "%",   "a" },
00105         { "%",   "abc" },
00106         { "%b",  "ab" },
00107         { "a%",  "ab" },
00108         { "ab%", "abc" },
00109         { "a%c", "abc" },
00110         { "%bc", "abc" },
00111         { "a%",  "abc" },
00112         { "%b%", "abc" },
00113         { "%c",  "abc" },
00114         { "%abc","abc" },
00115         { "abc%","abc" },
00116 
00117         // ensure that regex special chars are OK
00118         // and escaped properly
00119         { ".|*?+(){}[]^$\\",  ".|*?+(){}[]^$\\" },
00120         { "%.|*?+(){}[]^$\\", ".|*?+(){}[]^$\\" },
00121         { ".|*?+(){}[]^$\\%", ".|*?+(){}[]^$\\" },
00122         { "%){}[]^$\\",       ".|*?+(){}[]^$\\" },
00123         { ".|*?+()%",         ".|*?+(){}[]^$" },
00124         { "%$",               ".|*?+(){}[]^$" },
00125         { ".|*%",             ".|*?+(){}[]^$" },
00126 
00127         { "\\",      "\\" },
00128         { "a\\c",    "a\\c" },
00129         { "a\\%de",  "a\\cde" },
00130         { "a\\_de",  "a\\cde" },
00131         { "a\\.de",  "a\\.de" },
00132 
00133         { "X", "X" }  // end sentinel
00134     };
00135     string expPat;
00136     for (i = 0; *test[i][0] != 'X'; i++) {
00137         BOOST_MESSAGE("      true " << i << " " <<test[i][0]);
00138         try {
00139             SqlLikePrep<1, 1>(
00140                 test[i][0],
00141                 strlen(test[i][0]),
00142                 0, 0,    // no escape
00143                 expPat);
00144             boost::regex exp(expPat);
00145             result = SqlRegExp<1, 1>(
00146                 test[i][1],
00147                 strlen(test[i][1]),
00148                 strlen(test[i][0]),
00149                 exp);
00150         } catch (char const * const ptr) {
00151             // unexpected exception
00152             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
00153             BOOST_CHECK(0);
00154         } catch (boost::bad_expression badexp) {
00155             // regex format problem
00156             BOOST_MESSAGE("unexpected regex exception: "
00157                           <<badexp.what());
00158             BOOST_CHECK(0);
00159         } catch (...) {
00160             // unexpected exception
00161             BOOST_MESSAGE("unexpected unknown exception");
00162             BOOST_CHECK(0);
00163         }
00164 
00165         if (!result) {
00166             BOOST_MESSAGE("|" << test[i][1] <<
00167                           "| |" << test[i][0] << "|");
00168         }
00169 
00170         BOOST_CHECK(result);
00171     }
00172 }
00173 
00174 void
00175 SqlRegExpTest::testSqlRegExpLikeAsciiFalse()
00176 {
00177     bool result = false;
00178     int i;
00179     const char* test[][2] = {
00180         // pattern,    matchValue
00181         { "",    "a" },
00182         { "_",    "" },
00183         { "a",    "" },
00184         { "a",    "b" },
00185         { "b",    "ab" },
00186         { "a",    "ab" },
00187         { "__",    "a" },
00188         { "abc",    "Abc" },
00189         { "abc",    "aBc" },
00190         { "abc",    "abC" },
00191         { "_bc",    "aBc" },
00192         { "_bc",    "abC" },
00193         { "a_c",    "Abc" },
00194         { "ab_",    "aBc" },
00195         { "a__",    "Abc" },
00196         { "_b_",    "aBc" },
00197         { "__c",    "abC" },
00198 
00199         { "%b",     "a" },
00200         { "a%",     "b" },
00201         { "ab%",    "ac" },
00202         { "a%c",    "ab" },
00203         { "%bc",    "ab" },
00204         { "%b%",    "aBc" },
00205         { "%c",     "ab" },
00206         { "%abc","ac" },
00207         { "%abc","bc" },
00208         { "%abc","ab" },
00209         { "abc%","ab" },
00210         { "abc%","ac" },
00211         { "abc%","bc" },
00212 
00213         { "\\",      "a" },
00214         { "a",       "\\" },
00215         { "a\\c",    "a\\" },
00216         { "a\\c",    "\\c" },
00217         { "\\c",    "a\\" },
00218         { "a\\",    "\\c" },
00219 
00220         { "X",    "X" }  // end sentinel
00221     };
00222     string expPat;
00223     for (i = 0; *test[i][0] != 'X'; i++) {
00224         BOOST_MESSAGE("      false " << i << " " <<test[i][0]);
00225         try {
00226             SqlLikePrep<1,1>(
00227                 test[i][0],
00228                 strlen(test[i][0]),
00229                 0, 0,   // no escape
00230                 expPat);
00231             boost::regex exp(expPat);
00232             result = SqlRegExp<1,1>(
00233                 test[i][1],
00234                 strlen(test[i][1]),
00235                 strlen(test[i][0]),
00236                 exp);
00237         } catch (char const * const ptr) {
00238             // unexpected exception
00239             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
00240             BOOST_CHECK(0);
00241         } catch (boost::bad_expression badexp) {
00242             // regex format problem
00243             BOOST_MESSAGE("unexpected regex exception: "
00244                           <<badexp.what());
00245             BOOST_CHECK(0);
00246         } catch (...) {
00247             // unexpected exception
00248             BOOST_MESSAGE("unexpected unknown exception");
00249             BOOST_CHECK(0);
00250         }
00251 
00252 
00253         if (result) {
00254             BOOST_MESSAGE("|" << test[i][1] <<
00255                           "| |" << test[i][0] << "|");
00256         }
00257 
00258         BOOST_CHECK(!result);
00259     }
00260 }
00261 
00262 
00263 void
00264 SqlRegExpTest::testSqlRegExpLikeAsciiEscapeTrue()
00265 {
00266     bool result = false;
00267     int i;
00268 
00269     const char* test[][3] = {
00270         // pattern, matchValue, escape
00271         // define new escape
00272         { "_",      "a",  "#" },
00273         { "#_",    "_",   "#" },
00274         { "##",    "#",   "#" },
00275         { "#_bc",  "_bc", "#" },
00276         { "a#_c",  "a_c", "#" },
00277         { "ab#_",  "ab_", "#" },
00278 
00279         { "#%",    "%",   "#" },
00280         { "#%bc",  "%bc", "#" },
00281         { "a#%c",  "a%c", "#" },
00282         { "ab#%",  "ab%", "#" },
00283         { "%",     "a",   "#" },
00284         { "#%",    "%",   "#" },
00285 
00286         // define new escape that is special regexp char
00287         { "_",     "a",   "|" },
00288         { "|_",    "_",   "|" },
00289         { "||",    "|",   "|" },
00290         { "|_bc",  "_bc", "|" },
00291         { "a|_c",  "a_c", "|" },
00292         { "ab|_",  "ab_", "|" },
00293 
00294         { "|%",    "%",   "|" },
00295         { "|%bc",  "%bc", "|" },
00296         { "a|%c",  "a%c", "|" },
00297         { "ab|%",  "ab%", "|" },
00298         { "%",     "a",   "|" },
00299         { "|%",    "%",   "|" },
00300 
00301         // define new escape that is special regexp char
00302         { "_",     "a",   ")" },
00303         { ")_",    "_",   ")" },
00304         { "))",    ")",   ")" },
00305         { ")_bc",  "_bc", ")" },
00306         { "a)_c",  "a_c", ")" },
00307         { "ab)_",  "ab_", ")" },
00308 
00309         { ")%",    "%",   ")" },
00310         { ")%bc",  "%bc", ")" },
00311         { "a)%c",  "a%c", ")" },
00312         { "ab)%",  "ab%", ")" },
00313         { "%",     "a",   ")" },
00314         { ")%",    "%",   ")" },
00315 
00316         { "X",     "X",   "X" }  // end sentinel
00317     };
00318     string expPat;
00319     for (i = 0; *test[i][0] != 'X'; i++) {
00320         BOOST_MESSAGE("      escape true " << i << " " <<test[i][0]);
00321         try {
00322             SqlLikePrep<1, 1>(
00323                 test[i][0],
00324                 strlen(test[i][0]),
00325                 test[i][2],
00326                 strlen(test[i][2]),
00327                 expPat);
00328             boost::regex exp(expPat);
00329             result = SqlRegExp<1, 1>(
00330                 test[i][1],
00331                 strlen(test[i][1]),
00332                 strlen(test[i][0]),
00333                 exp);
00334         } catch (char const * const ptr) {
00335             // unexpected exception
00336             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
00337             BOOST_CHECK(0);
00338         } catch (boost::bad_expression badexp) {
00339             // regex format problem
00340             BOOST_MESSAGE("unexpected regex exception: "
00341                           <<badexp.what());
00342             BOOST_CHECK(0);
00343         } catch (...) {
00344             // unexpected exception
00345             BOOST_MESSAGE("unexpected unknown exception");
00346             BOOST_CHECK(0);
00347         }
00348 
00349         if (!result) {
00350             BOOST_MESSAGE("|" << test[i][1] <<
00351                           "| |" << test[i][0] << "|");
00352         }
00353 
00354         BOOST_CHECK(result);
00355     }
00356 }
00357 
00358 void
00359 SqlRegExpTest::testSqlRegExpLikeAsciiEscapeFalse()
00360 {
00361     bool result = false;
00362     int i;
00363 
00364     const char* test[][3] = {
00365         // pattern, matchValue, escape
00366 
00367         { "_",    "ab",  "#" },
00368         { "#_",   "_a",  "#" },
00369         { "#_",   "a_",  "#" },
00370         { "#_",   "a",   "#" },
00371         { "#_",   "__",  "#" },
00372         { "#_",   "a",   "#" },
00373         { "##",   "a",   "#" },
00374         { "#_#_", "a",   "#" },
00375         { "#_#_", "_",   "#" },
00376         { "#_#_", "_a",  "#" },
00377         { "#_#_", "a_",  "#" },
00378         { "#_bc", "abc", "#" },
00379         { "a#_c", "abc", "#" },
00380         { "ab#_", "abc", "#" },
00381 
00382         { "#%",   "a",   "#" },
00383         { "#%",   "ab",  "#" },
00384         { "#%",   "a",   "#" },
00385         { "#%bc", "abc", "#" },
00386         { "a#%c", "abc", "#" },
00387         { "ab#%", "abc", "#" },
00388 
00389         // define escape that is special regexp char
00390         { "_",    "ab",  "|" },
00391         { "|_",   "_a",  "|" },
00392         { "|_",   "a_",  "|" },
00393         { "|_",   "a",   "|" },
00394         { "||",   "a",   "|" },
00395         { "|%",   "a",   "|" },
00396         { "|%",   "ab",  "|" },
00397 
00398         // define escape that is special regexp char
00399         { "_",    "ab",  ")" },
00400         { ")_",   "_a",  ")" },
00401         { ")_",   "a_",  ")" },
00402         { ")_",   "a",   ")" },
00403         { "))",   "a",   ")" },
00404         { ")%",   "a",   ")" },
00405         { ")%",   "ab",  ")" },
00406 
00407 
00408         { "X",    "X",   "X" }  // end sentinel
00409     };
00410     string expPat;
00411     for (i = 0; *test[i][0] != 'X'; i++) {
00412         BOOST_MESSAGE("      escapefalse " << i << " " <<test[i][0]);
00413         try {
00414             SqlLikePrep<1, 1>(
00415                 test[i][0],
00416                 strlen(test[i][0]),
00417                 test[i][2],
00418                 strlen(test[i][2]),
00419                 expPat);
00420             boost::regex exp(expPat);
00421             result = SqlRegExp<1, 1>(
00422                 test[i][1],
00423                 strlen(test[i][1]),
00424                 strlen(test[i][0]),
00425                 exp);
00426         } catch (char const * const ptr) {
00427             // unexpected exception
00428             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
00429             BOOST_CHECK(0);
00430         } catch (boost::bad_expression badexp) {
00431             // regex format problem
00432             BOOST_MESSAGE("unexpected regex exception: "
00433                           <<badexp.what());
00434             BOOST_CHECK(0);
00435         } catch (...) {
00436             // unexpected exception
00437             BOOST_MESSAGE("unexpected unknown exception");
00438             BOOST_CHECK(0);
00439         }
00440 
00441         if (result) {
00442             BOOST_MESSAGE("|" << test[i][1] <<
00443                           "| |" << test[i][0] << "|");
00444         }
00445 
00446         BOOST_CHECK(!result);
00447     }
00448 }
00449 
00450 void
00451 SqlRegExpTest::testSqlRegExpLikeAsciiException()
00452 {
00453     bool caught = false;
00454     bool result = false;
00455     int i;
00456 
00457     const char* test[][4] = {
00458         // pattern, matchValue, escape, exception
00459         { "=",       "a",       "=",       "22025" },
00460         { "=a",       "a",       "=",      "22025" },
00461 
00462         { "a",       "a",       "ab",      "22019" },
00463         { "a",       "a",       "\\\\",    "22019" },
00464 
00465         { "X",       "X",       "X",       "X" }  // end sentinel
00466     };
00467     string expPat;
00468     for (i = 0; *test[i][0] != 'X'; i++) {
00469         BOOST_MESSAGE("      exception " << i << " " <<test[i][0]);
00470         caught = false;
00471         try {
00472             SqlLikePrep<1,1>(
00473                 test[i][0],
00474                 strlen(test[i][0]),
00475                 test[i][2],
00476                 strlen(test[i][2]),
00477                 expPat);
00478             boost::regex exp(expPat);
00479             result = SqlRegExp<1,1>(
00480                 test[i][1],
00481                 strlen(test[i][1]),
00482                 strlen(test[i][0]),
00483                 exp);
00484         } catch (char const * const ex) {
00485             caught = true;
00486             BOOST_CHECK(!strcmp(ex,       test[i][3]));
00487         }
00488         if (!caught) {
00489             BOOST_CHECK(0);
00490         }
00491     }
00492 }
00493 
00494 void
00495 SqlRegExpTest::testSqlRegExpSimilarAscii()
00496 {
00497     bool result = false;
00498     int i;
00499 
00500     const char* test[][3] = {
00501         // pattern, matchValue, result
00502 
00503         // {2}
00504         // SQL2003 Part 2 Section 8.6 General Rule 6.a & 7.d
00505         { "a{2}",      "aa",     "t" },
00506         { "a{2}b",     "aab",    "t" },
00507         { "(bc){2}",   "bcbc",   "t" },
00508         { "(bc){2}d",  "bcbcd",  "t" },
00509         { "a(bc){2}",  "abcbc",  "t" },
00510         { "[bc]{2}",   "bb",     "t" },
00511         { "[bc]{2}",   "cc",     "t" },
00512         { "[bc]{2}",   "bc",     "t" },
00513         { "[bc]{2}",   "cb",     "t" },
00514 
00515         { "a{2}",      "",       "f" },
00516         { "a{2}",      "a",      "f" },
00517         { "a{2}",      "ac",     "f" },
00518         { "a{2}",      "aaa",    "f" },
00519         { "a{2}",      "ab",     "f" },
00520         { "a{2}b",     "",       "f" },
00521         { "a{2}b",     "b",      "f" },
00522         { "a{2}b",     "ab",     "f" },
00523         { "a{2}b",     "aa",     "f" },
00524         { "a{2}b",     "ab",     "f" },
00525         { "(bc){2}",   "",       "f" },
00526         { "(bc){2}",   "a",      "f" },
00527         { "(bc){2}",   "bc",     "f" },
00528         { "(bc){2}",   "bcbcb",  "f" },
00529         { "[bc]{2}",   "",       "f" },
00530         { "[bc]{2}",   "a",      "f" },
00531         { "[bc]{2}",   "b",      "f" },
00532         { "[bc]{2}",   "c",      "f" },
00533         { "[bc]{2}",   "ad",     "f" },
00534         { "[bc]{2}",   "",       "f" },
00535         { "[bc]{2}",   "bbc",    "f" },
00536 
00537         // {2,3}
00538         // SQL2003 Part 2 Section 8.6 General Rule 6.b & 7.d
00539         // <upper limit> w/ <high value>
00540         { "a{2,3}",    "aa",     "t" },
00541         { "a{2,3}",    "aaa",    "t" },
00542         { "a{2,3}b",   "aab",    "t" },
00543         { "a{2,3}b",   "aaab",   "t" },
00544         { "(bc){2,3}", "bcbc",   "t" },
00545         { "(bc){2,3}", "bcbcbc", "t" },
00546         { "[bc]{2,3}", "bb",     "t" },
00547         { "[bc]{2,3}", "bbb",    "t" },
00548         { "[bc]{2,3}", "cc",     "t" },
00549         { "[bc]{2,3}", "ccc",    "t" },
00550         { "[bc]{2,3}", "bcb",    "t" },
00551         { "[bc]{2,3}", "cbc",    "t" },
00552 
00553         { "a{2,3}",    "",       "f" },
00554         { "a{2,3}",    "a",      "f" },
00555         { "a{2,3}",    "aaaa",   "f" },
00556         { "a{2,3}",    "aab",    "f" },
00557         { "(bc){2,3}", "",       "f" },
00558         { "(bc){2,3}", "a",      "f" },
00559         { "(bc){2,3}", "bc",     "f" },
00560         { "(bc){2,3}", "cbcb",   "f" },
00561         { "[bc]{2,3}", "",       "f" },
00562         { "[bc]{2,3}", "a",      "f" },
00563         { "[bc]{2,3}", "b",      "f" },
00564         { "[bc]{2,3}", "c",      "f" },
00565         { "[bc]{2,3}", "bcbcbc", "f" },
00566         { "[bc]{2,3}", "bcbcbcb","f" },
00567         { "[bc]{2,3}", "",       "f" },
00568         { "[bc]{2,3}", "bbcc",   "f" },
00569 
00570         // {2,}
00571         // SQL2003 Part 2 Section 8.6 General Rule 6.c & 7.d
00572         // <upper limit> w/o <high value>
00573         // 98.6% sure that I'm interpreting this correctly. -JK 2004/6
00574         { "a{2,}",    "aa",     "t" },
00575         { "a{2,}",    "aaa",    "t" },
00576         { "a{2,}",    "aaaa",    "t" },
00577         { "a{2,}b",   "aab",    "t" },
00578         { "a{2,}b",   "aaab",   "t" },
00579         { "(bc){2,}", "bcbc",   "t" },
00580         { "(bc){2,}", "bcbcbc", "t" },
00581         { "[bc]{2,}", "bb",     "t" },
00582         { "[bc]{2,}", "bbb",    "t" },
00583         { "[bc]{2,}", "cc",     "t" },
00584         { "[bc]{2,}", "ccc",    "t" },
00585         { "[bc]{2,}", "bcb",    "t" },
00586         { "[bc]{2,}", "cbc",    "t" },
00587 
00588         { "a{2,}",    "",       "f" },
00589         { "a{2,}",    "a",      "f" },
00590         { "a{2,}",    "aab",    "f" },
00591         { "(bc){2,}", "",       "f" },
00592         { "(bc){2,}", "a",      "f" },
00593         { "(bc){2,}", "bc",     "f" },
00594         { "(bc){2,}", "cbcb",   "f" },
00595         { "(bc){2,}", "bcbcb",  "f" },
00596         { "[bc]{2,}", "",       "f" },
00597         { "[bc]{2,}", "a",      "f" },
00598         { "[bc]{2,}", "b",      "f" },
00599         { "[bc]{2,}", "c",      "f" },
00600         { "[bc]{2,}", "bcd",    "f" },
00601 
00602         // |
00603         // SQL2003 Part 2 Section 8.6 General Rule 7.a
00604         { "a|b",      "a",      "t" },
00605         { "a|b",      "b",      "t" },
00606         { "a|bc",     "a",      "t" },
00607         { "a|bc",     "bc",     "t" },
00608         { "(a|b)c",   "ac",     "t" },
00609         { "(a|b)c",   "bc",     "t" },
00610 
00611         { "a|b",      "c",      "f" },
00612         { "a|bc",     "c",      "f" },
00613         { "a|bc",     "ac",     "f" },
00614         { "(a|b)c",   "c",      "f" },
00615         { "(a|b)c",   "dc",     "f" },
00616 
00617         // *
00618         // SQL2003 Part 2 Section 8.6 General Rule 7.b
00619         { "a*b",      "b",      "t" },
00620         { "a*b",      "ab",     "t" },
00621         { "a*b",      "aab",    "t" },
00622         { "ab*",      "a",      "t" },
00623         { "ab*",      "ab",     "t" },
00624         { "ab*",      "abb",    "t" },
00625         { "a(bc)*",   "a",      "t" },
00626         { "a(bc)*",   "abc",    "t" },
00627         { "a(bc)*",   "abcbc",  "t" },
00628         { "a[bc]*",   "a",      "t" },
00629         { "a[bc]*",   "ab",     "t" },
00630         { "a[bc]*",   "ac",     "t" },
00631         { "a[bc]*",   "abb",    "t" },
00632         { "a[bc]*",   "abc",    "t" },
00633         { "a[bc]*",   "abc",    "t" },
00634         { "a[bc]*",   "acc",    "t" },
00635         { "a[bc]*",   "abbb",   "t" },
00636         { "a[bc]*",   "accc",   "t" },
00637 
00638         { "a*b",      "",       "f" },
00639         { "a*b",      "a",      "f" },
00640         { "a*b",      "ac",     "f" },
00641         { "ab*",      "b" ,     "f" },
00642         { "ab*",      "ac",     "f" },
00643         { "a(bc)*",   "",       "f" },
00644         { "a(bc)*",   "ad",     "f" },
00645         { "a(bc)*",   "abd",    "f" },
00646         { "a(bc)*",   "adb",    "f" },
00647         { "a[bc]*",   "",       "f" },
00648         { "a[bc]*",   "ad",     "f" },
00649         { "a[bc]*",   "abd",    "f" },
00650         { "a[bc]*",   "acd",    "f" },
00651 
00652         // +
00653         // SQL2003 Part 2 Section 8.6 General Rule 7.c
00654         { "a+b",      "ab",     "t" },
00655         { "a+b",      "aab",    "t" },
00656         { "ab+",      "ab",     "t" },
00657         { "ab+",      "abb",    "t" },
00658         { "a(bc)+",   "abc",    "t" },
00659         { "a(bc)+",   "abcbc",  "t" },
00660         { "a[bc]+",   "ab",     "t" },
00661         { "a[bc]+",   "abb",    "t" },
00662         { "a[bc]+",   "ac",     "t" },
00663         { "a[bc]+",   "acc",    "t" },
00664         { "a[bc]+",   "abc",    "t" },
00665         { "a[bc]+",   "abccb",  "t" },
00666 
00667         { "a+b",      "",       "f" },
00668         { "a+b",      "a",      "f" },
00669         { "a+b",      "b",      "f" },
00670         { "a+b",      "ac",     "f" },
00671         { "ab+",      "",       "f" },
00672         { "ab+",      "a",      "f" },
00673         { "ab+",      "b" ,     "f" },
00674         { "ab+",      "ac",     "f" },
00675         { "a(bc)+",   "",       "f" },
00676         { "a(bc)+",   "a",      "f" },
00677         { "a(bc)+",   "ad",     "f" },
00678         { "a(bc)+",   "abd",    "f" },
00679         { "a(bc)+",   "adb",    "f" },
00680         { "a[bc]+",   "",       "f" },
00681         { "a[bc]+",   "a",      "f" },
00682         { "a[bc]+",   "ad",     "f" },
00683         { "a[bc]+",   "abd",    "f" },
00684         { "a[bc]+",   "acd",    "f" },
00685 
00686         // General Rule 7.d is above with GR6
00687 
00688         // SQL2003 Part 2 Section 8.6 General Rule 7.e
00689         { "a",         "a",     "t" },
00690         { "a",         "",      "f" },
00691 
00692         // %
00693         // SQL2003 Part 2 Section 8.6 General Rule 7.f
00694         { "%",        "",       "t" },
00695         { "%",        "a",      "t" },
00696         { "%",        "abc",    "t" },
00697         { "%b",       "ab",     "t" },
00698         { "a%",       "ab",     "t" },
00699         { "ab%",      "abc",    "t" },
00700         { "a%c",      "abc",    "t" },
00701         { "%bc",      "abc",    "t" },
00702         { "a%",       "abc",    "t" },
00703         { "%b%",      "abc",    "t" },
00704         { "%c",       "abc",    "t" },
00705         { "%abc",     "abc",    "t" },
00706         { "abc%",     "abc",    "t" },
00707 
00708         { "%b",       "a",      "f" },
00709         { "a%",       "b",      "f" },
00710         { "ab%",      "ac",     "f" },
00711         { "a%c",      "ab",     "f" },
00712         { "%bc",      "ab",     "f" },
00713         { "%b%",      "aBc",    "f" },
00714         { "%c",       "ab",     "f" },
00715         { "%abc",     "ac",     "f" },
00716         { "%abc",     "bc",     "f" },
00717         { "%abc",     "ab",     "f" },
00718         { "abc%",     "ab",     "f" },
00719         { "abc%",     "ac",     "f" },
00720         { "abc%",     "bc",     "f" },
00721 
00722         // ?
00723         // SQL2003 Part 2 Section 8.6 General Rule 7.g
00724         { "a?b",      "b",      "t" },
00725         { "a?b",      "ab",     "t" },
00726         { "ab?",      "a",      "t" },
00727         { "ab?",      "ab",     "t" },
00728         { "a(bc)?",   "a",      "t" },
00729         { "a(bc)?",   "abc",    "t" },
00730         { "a[bc]?",   "a",      "t" },
00731         { "a[bc]?",   "ab",     "t" },
00732         { "a[bc]?",   "ac",     "t" },
00733 
00734         { "a?b",      "",       "f" },
00735         { "a?b",      "a",      "f" },
00736         { "a?b",      "ac",     "f" },
00737         { "a?b",      "aab",    "f" },
00738         { "ab?",      "abb",    "f" },
00739         { "ab?",      "b" ,     "f" },
00740         { "ab?",      "ac",     "f" },
00741         { "a(bc)?",   "",       "f" },
00742         { "a(bc)?",   "abcbc",  "f" },
00743         { "a(bc)?",   "ad",     "f" },
00744         { "a(bc)?",   "abd",    "f" },
00745         { "a(bc)?",   "adb",    "f" },
00746         { "a[bc]?",   "",       "f" },
00747         { "a[bc]?",   "ad",     "f" },
00748         { "a[bc]?",   "abd",    "f" },
00749         { "a[bc]?",   "acd",    "f" },
00750         { "a[bc]?",   "abb",    "f" },
00751         { "a[bc]?",   "abc",    "f" },
00752         { "a[bc]?",   "abc",    "f" },
00753         { "a[bc]?",   "acc",    "f" },
00754         { "a[bc]?",   "abbb",   "f" },
00755         { "a[bc]?",   "accc",   "f" },
00756 
00757         // SQL2003 Part 2 Section 8.6 General Rule 7.h
00758         // also mixed with other tests
00759         { "(a)",      "a",      "t" },
00760         { "(ab)",     "ab",     "t" },
00761         { "(a)(b)",   "ab",     "t" },
00762         { "a(b)(c)d", "abcd",   "t" },
00763         { "(a(b))",   "ab",     "t" },
00764 
00765         { "(a)",      "",       "f" },
00766         { "(a)",      "b",      "f" },
00767         { "(ab)",     "a",      "f" },
00768         { "(ab)",     "b",      "f" },
00769         { "(a)(b)",   "a",      "f" },
00770         { "(a)(b)",   "b",      "f" },
00771         { "(a)(b)",   "abc",    "f" },
00772         { "a(b)(c)d", "abc",    "f" },
00773         { "a(b)(c)d", "bcd",    "f" },
00774         { "(a(b))",   "abc",    "f" },
00775 
00776         // _
00777         // SQL2003 Part 2 Section 8.6 General Rule 7.i
00778         { "_",        "a",      "t" },
00779         { "a",        "a",      "t" },
00780         { "abc",      "abc",    "t" },
00781         { "_bc",      "abc",    "t" },
00782         { "a_c",      "abc",    "t" },
00783         { "ab_",      "abc",    "t" },
00784         { "a__",      "abc",    "t" },
00785         { "_b_",      "abc",    "t" },
00786         { "__c",      "abc",    "t" },
00787 
00788         { "_",        "",       "f" },
00789         { "a",        "",       "f" },
00790         { "a",        "b",      "f" },
00791         { "b",        "ab",     "f" },
00792         { "a",        "ab",     "f" },
00793         { "__",       "a",      "f" },
00794         { "abc",      "Abc",    "f" },
00795         { "abc",      "aBc",    "f" },
00796         { "abc",      "abC",    "f" },
00797         { "_bc",      "aBc",    "f" },
00798         { "_bc",      "abC",    "f" },
00799         { "a_c",      "Abc",    "f" },
00800         { "ab_",      "aBc",    "f" },
00801         { "a__",      "Abc",    "f" },
00802         { "_b_",      "aBc",    "f" },
00803         { "__c",      "abC",    "f" },
00804 
00805         // [a], [ab], [a-c]
00806         // SQL2003 Part 2 Section 8.6 General Rule 7.j
00807         // SQL2003 Part 2 Section 8.6 General Rule 5.a & 5.b
00808         // (General Rule 5b is tested throughout below)
00809         { "[a]",       "a",     "t" },
00810         { "[ab]",      "a",     "t" },
00811         { "[ab]",      "b",     "t" },
00812         { "[a-c]",     "a",     "t" },
00813         { "[a-c]",     "b",     "t" },
00814         { "[a-c]",     "c",     "t" },
00815 
00816         { "[a]",       "",      "f" },
00817         { "[a]",       "b",     "f" },
00818         { "[a]",       "ab",    "f" },
00819         { "[ab]",      "",      "f" },
00820         { "[ab]",      "c",     "f" },
00821         { "[ab]",      "ab",    "f" },
00822         { "[a-c]",     "",      "f" },
00823         { "[a-c]",     "Z",     "f" },
00824         { "[a-c]",     "d",     "f" },
00825 
00826         // [^a], [^ab], [^a-c]
00827         // SQL2003 Part 2 Section 8.6 General Rule 7.k
00828         // SQL2003 Part 2 Section 8.6 General Rule 5.a & 5.b
00829         { "[^a]",      "b",     "t" },
00830         { "[^ab]",     "c",     "t" },
00831         { "[^a-c]",    "d",     "t" },
00832 
00833         { "[^a]",      "",      "f" },
00834         { "[^a]",      "a",     "f" },
00835         { "[^a]",      "ab",    "f" },
00836         { "[^ab]",     "",      "f" },
00837         { "[^ab]",     "a",     "f" },
00838         { "[^ab]",     "b",     "f" },
00839         { "[^ab]",     "ab",    "f" },
00840         { "[^a-c]",    "",      "f" },
00841         { "[^a-c]",    "a",     "f" },
00842         { "[^a-c]",    "b",     "f" },
00843         { "[^a-c]",    "c",     "f" },
00844         { "[^a-c]",    "ab",    "f" },
00845 
00846         // [a^b], [a-c^d-f]
00847         // SQL2003 Part 2 Section 8.6 General Rule 7.l (7L)
00848         // boost regex does not support this
00849         // SqlSimilarPrep does not currently have a workaround.
00850         // TODO: Add a workaround in SqlSimilarPrep to allow this to work
00851 #if 0
00852         { "[a^b]",     "ac",    "t" },
00853         { "[a^b]",     "aa",    "t" },
00854         { "[a-c^d-f]", "ad",    "t" },
00855         { "[a-c^d-f]", "cf",    "t" },
00856 
00857         { "[a^b]",     "",      "f" },
00858         { "[a^b]",     "a",     "f" },
00859         { "[a^b]",     "b",     "f" },
00860         { "[a^b]",     "bb",    "f" },
00861         { "[a^b]",     "ab",    "f" },
00862         { "[a^b]",     "acd",   "f" },
00863         { "[a-c^d-f]", "",      "f" },
00864         { "[a-c^d-f]", "aa",    "f" },
00865         { "[a-c^d-f]", "ag",    "f" },
00866         { "[a-c^d-f]", "ca",    "f" },
00867         { "[a-c^d-f]", "cg",    "f" },
00868         { "[a-c^d-f]", "ad",    "f" },
00869         { "[a-c^d-f]", "af",    "f" },
00870         { "[a-c^d-f]", "aaa",   "f" },
00871 #endif
00872 
00873         // SQL2003 Part 2 Section 8.6 General Rule 7.m
00874         { "[[:alpha:]]",  "a",  "t" },
00875         { "[[:ALPHA:]]",  "a",  "t" },
00876         { "[[:ALPHA:]]",  "A",  "t" },
00877         { "[^[:alpha:]]", "1",  "t" },
00878         { "[^[:ALPHA:]]", "1",  "t" },
00879 
00880         { "[[:ALPHA:]]",  "",   "f" },
00881         { "[[:ALPHA:]]",  " ",  "f" },
00882         { "[[:ALPHA:]]",  "\t", "f" },
00883         { "[[:ALPHA:]]",  "\n", "f" },
00884         { "[[:ALPHA:]]",  "1",  "f" },
00885         { "[[:ALPHA:]]",  "@",  "f" },
00886         { "[[:ALPHA:]]",  "a1", "f" },
00887         { "[[:ALPHA:]]",  "aa", "f" },
00888         { "[^[:ALPHA:]]", "a",  "f" },
00889         { "[^[:ALPHA:]]", "A",  "f" },
00890 
00891         // SQL2003 Part 2 Section 8.6 General Rule 7.n
00892         { "[[:upper:]]",  "A",  "t" },
00893         { "[[:UPPER:]]",  "A",  "t" },
00894         { "[^[:upper:]]", "1",  "t" },
00895         { "[^[:UPPER:]]", "1",  "t" },
00896         { "[^[:UPPER:]]", "a",  "t" },
00897 
00898         { "[[:UPPER:]]",  "",   "f" },
00899         { "[[:UPPER:]]",  " ",  "f" },
00900         { "[[:UPPER:]]",  "\t", "f" },
00901         { "[[:UPPER:]]",  "\n", "f" },
00902         { "[[:UPPER:]]",  "1",  "f" },
00903         { "[[:UPPER:]]",  "@",  "f" },
00904         { "[[:UPPER:]]",  "a",  "f" },
00905         { "[[:UPPER:]]",  "AA", "f" },
00906         { "[^[:UPPER:]]", "A",  "f" },
00907 
00908         // SQL2003 Part 2 Section 8.6 General Rule 7.o
00909         { "[[:lower:]]",  "a",  "t" },
00910         { "[[:LOWER:]]",  "a",  "t" },
00911         { "[^[:lower:]]", "1",  "t" },
00912         { "[^[:LOWER:]]", "1",  "t" },
00913         { "[^[:LOWER:]]", "A",  "t" },
00914 
00915         { "[[:LOWER:]]",  "",   "f" },
00916         { "[[:LOWER:]]",  " ",  "f" },
00917         { "[[:LOWER:]]",  "\t", "f" },
00918         { "[[:LOWER:]]",  "\n", "f" },
00919         { "[[:LOWER:]]",  "1",  "f" },
00920         { "[[:LOWER:]]",  "@",  "f" },
00921         { "[[:LOWER:]]",  "A",  "f" },
00922         { "[[:LOWER:]]",  "aa", "f" },
00923         { "[^[:LOWER:]]", "a",  "f" },
00924 
00925         // SQL2003 Part 2 Section 8.6 General Rule 7.p
00926         { "[[:digit:]]",  "1",  "t" },
00927         { "[[:DIGIT:]]",  "1",  "t" },
00928         { "[^[:digit:]]", "a",  "t" },
00929         { "[^[:DIGIT:]]", "a",  "t" },
00930         { "[^[:DIGIT:]]", "a",  "t" },
00931 
00932         { "[[:DIGIT:]]",  "",   "f" },
00933         { "[[:DIGIT:]]",  " ",  "f" },
00934         { "[[:DIGIT:]]",  "\t", "f" },
00935         { "[[:DIGIT:]]",  "\n", "f" },
00936         { "[[:DIGIT:]]",  "a",  "f" },
00937         { "[[:DIGIT:]]",  "@",  "f" },
00938 
00939         // SQL2003 Part 2 Section 8.6 General Rule 7.q
00940         { "[[:space:]]",  " ",  "t" },
00941         { "[[:SPACE:]]",  " ",  "t" },
00942         { "[^[:space:]]", "a",  "t" },
00943         { "[^[:SPACE:]]", "a",  "t" },
00944         { "[^[:SPACE:]]", "\t", "t" },
00945         { "[^[:SPACE:]]", "\n", "t" },
00946 
00947         { "[[:SPACE:]]",  "",   "f" },
00948         { "[^[:SPACE:]]", " ",  "f" },
00949         { "[^[:SPACE:]]", "  ", "f" },
00950         { "[[:SPACE:]]",  "\t", "f" },
00951         { "[[:SPACE:]]",  "\n", "f" },
00952         { "[[:SPACE:]]",  "a",  "f" },
00953         { "[[:SPACE:]]",  "@",  "f" },
00954 
00955 
00956         // SQL2003 Part 2 Section 8.6 General Rule 7.r
00957         { "[[:whitespace:]]",  " ",  "t" },
00958         { "[[:WHITESPACE:]]",  " ",  "t" },
00959         { "[[:WHITESPACE:]]",  "\t",  "t" },
00960         { "[[:WHITESPACE:]]",  "\n",  "t" },
00961         { "[[:WHITESPACE:]]",  "\v",  "t" },
00962         { "[[:WHITESPACE:]]",  "\f",  "t" },
00963         { "[[:WHITESPACE:]]",  "\r",  "t" },
00964         { "[[:WHITESPACE:]]",  "\x20",  "t" },
00965         { "[[:WHITESPACE:]]",  "\xa0",  "t" },
00966         { "[[:WHITESPACE:]]",  "\x09",  "t" },
00967         { "[[:WHITESPACE:]]",  "\x0a",  "t" },
00968         { "[[:WHITESPACE:]]",  "\x0b",  "t" },
00969         { "[[:WHITESPACE:]]",  "\x0c",  "t" },
00970         { "[[:WHITESPACE:]]",  "\x0d",  "t" },
00971         { "[[:WHITESPACE:]]",  "\x85",  "t" },
00972         { "[^[:whitespace:]]", "a",  "t" },
00973         { "[^[:WHITESPACE:]]", "a",  "t" },
00974 
00975         { "[^[:WHITESPACE:]]", "\t", "f" },
00976         { "[^[:WHITESPACE:]]", "\n", "f" },
00977         { "[[:WHITESPACE:]]",  "",   "f" },
00978         { "[^[:WHITESPACE:]]", " ",  "f" },
00979         { "[^[:WHITESPACE:]]", "  ", "f" },
00980         { "[[:WHITESPACE:]]",  "a",  "f" },
00981         { "[[:WHITESPACE:]]",  "@",  "f" },
00982 
00983         // SQL2003 Part 2 Section 8.6 General Rule 7.s
00984         { "[[:alnum:]]",  "a",  "t" },
00985         { "[[:ALNUM:]]",  "a",  "t" },
00986         { "[[:ALNUM:]]",  "1",  "t" },
00987         { "[[:ALNUM:]]",  "A",  "t" },
00988         { "[^[:alnum:]]", "!",  "t" },
00989         { "[^[:ALNUM:]]", "!",  "t" },
00990         { "[^[:ALNUM:]]", " ",  "t" },
00991         { "[^[:ALNUM:]]", "\t", "t" },
00992         { "[^[:ALNUM:]]", "\n", "t" },
00993 
00994         { "[[:ALNUM:]]",  "",   "f" },
00995         { "[[:ALNUM:]]",  " ",  "f" },
00996         { "[[:ALNUM:]]",  "\t", "f" },
00997         { "[[:ALNUM:]]",  "\n", "f" },
00998         { "[^[:ALNUM:]]", "1",  "f" },
00999         { "[^[:ALNUM:]]", "a",  "f" },
01000         { "[^[:ALNUM:]]", "A",  "f" },
01001         { "[[:ALNUM:]]",  "aa", "f" },
01002 
01003         // SQL2003 Part 2 Section 8.6 General Rule 7.t
01004         // TODO: Understand and implement 7.t. (Confused.)
01005 #if 0
01006         { "||",       "a",      "t" },
01007         { "||",       "aa",     "f" },
01008 #endif
01009 
01010         // SQL2003 Part 2 Section 8.6 General Rule 7.u
01011         { "",         "",       "t" },
01012         { "",         "a",      "f" },
01013 
01014 
01015         // search for characters special to regex, but not to SQL
01016         { "\\",       "\\",     "t" },
01017         { "\\",       "a",      "f" },
01018         { "$",        "$",      "t" },
01019         { "a$c",      "a$c",    "t" },
01020         { "a$c",      "abc",    "f" },
01021         { ".",        ".",      "t" },
01022         { ".",        "a",      "f" },
01023 
01024         { "X",  "X",  "X" }  // end sentinel
01025     };
01026     string expPat;
01027     for (i = 0; *test[i][0] != 'X'; i++) {
01028         BOOST_MESSAGE(" ===== Ascii Similar " << i <<
01029                       " " << test[i][0] << " " <<
01030                       test[i][1] << " " << test[i][2]);
01031         try {
01032             SqlSimilarPrep<1, 1>(
01033                 test[i][0],
01034                 strlen(test[i][0]),
01035                 0, 0,   // no escape
01036                 expPat);
01037 
01038             boost::regex exp(expPat);
01039             result = SqlRegExp<1, 1>(
01040                 test[i][1],
01041                 strlen(test[i][1]),
01042                 strlen(test[i][0]),
01043                 exp);
01044         } catch (char const * const ptr) {
01045             // unexpected exception
01046             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
01047             BOOST_CHECK(0);
01048         } catch (boost::bad_expression badexp) {
01049             // regex format problem
01050             BOOST_MESSAGE("unexpected regex exception: "
01051                           <<badexp.what());
01052             BOOST_CHECK(0);
01053         } catch (...) {
01054             // unexpected exception
01055             BOOST_MESSAGE("unexpected unknown exception");
01056             BOOST_CHECK(0);
01057         }
01058 
01059         if (*(test[i][2]) == 't') {
01060             if (!result) {
01061                 BOOST_MESSAGE("|" << test[i][1] <<
01062                               "| |" << test[i][0] << "| expPat=|" <<
01063                               expPat << "|");
01064             }
01065             BOOST_CHECK(result);
01066         } else {
01067             if (result) {
01068                 BOOST_MESSAGE("|" << test[i][1] <<
01069                               "| |" << test[i][0] << "| expPat=|" <<
01070                               expPat << "|");
01071             }
01072             BOOST_CHECK(!result);
01073         }
01074     }
01075 }
01076 
01077 void
01078 SqlRegExpTest::testSqlRegExpSimilarAsciiEscape()
01079 {
01080     bool result = false;
01081     int i;
01082 
01083     const char* test[][4] = {
01084         // pattern, matchValue, escape
01085 
01086         // define a new escape
01087         { "_",       "a",       "#", "t" },
01088         { "#_",      "_",       "#", "t" },
01089         { "#_bc",    "_bc",     "#", "t" },
01090         { "a#_c",    "a_c",     "#", "t" },
01091         { "ab#_",    "ab_",     "#", "t" },
01092         { "#%",      "%",       "#", "t" },
01093         { "#%bc",    "%bc",     "#", "t" },
01094         { "a#%c",    "a%c",     "#", "t" },
01095         { "ab#%",    "ab%",     "#", "t" },
01096         { "%",       "a",       "#", "t" },
01097         { "#%",      "%",       "#", "t" },
01098         { "##",      "#",       "#", "t" },
01099         // try all special chars (both to SIMILAR & regex)
01100         {
01101             "#[#]#(#)#|#^#-#+#*#_#%#?#{#}$.\\",
01102             "[]()|^-+*_%?{}$.\\",  "#", "t"
01103         },
01104         {
01105             "#[#{#(#|#?#^#*#%#+#-#_#)#}#]$.\\",
01106             "[{(|?^*%+-_)}]$.\\",  "#", "t"
01107         },
01108 
01109 
01110         { "#%",      "a",       "#", "f" },
01111         { "##",      "a",       "#", "f" },
01112         { "#%",      "ab",      "#", "f" },
01113         { "#%",      "a",       "#", "f" },
01114         { "#%bc",    "abc",     "#", "f" },
01115         { "a#%c",    "abc",     "#", "f" },
01116         { "ab#%",    "abc",     "#", "f" },
01117 
01118         { "_",       "ab",      "#", "f" },
01119         { "#_",      "_a",      "#", "f" },
01120         { "#_",      "a_",      "#", "f" },
01121         { "#_",      "a",       "#", "f" },
01122         { "#_",      "__",      "#", "f" },
01123         { "#_",      "a",       "#", "f" },
01124         { "#_#_",    "a",       "#", "f" },
01125         { "#_#_",    "_",       "#", "f" },
01126         { "#_#_",    "_a",      "#", "f" },
01127         { "#_#_",    "a_",      "#", "f" },
01128         { "#_bc",    "abc",     "#", "f" },
01129         { "a#_c",    "abc",     "#", "f" },
01130         { "ab#_",    "abc",     "#", "f" },
01131 
01132         // define new escape that is special regexp char
01133         { "_",       "a",       "|", "t" },
01134         { "|_",      "_",       "|", "t" },
01135         { "|_bc",    "_bc",     "|", "t" },
01136         { "a|_c",    "a_c",     "|", "t" },
01137         { "ab|_",    "ab_",     "|", "t" },
01138         { "||",      "|",       "|", "t" },
01139         { "((",      "(",       "(", "t" },
01140 
01141         { "|%",      "%",       "|", "t" },
01142         { "|%bc",    "%bc",     "|", "t" },
01143         { "a|%c",    "a%c",     "|", "t" },
01144         { "ab|%",    "ab%",     "|", "t" },
01145         { "%",       "a",       "|", "t" },
01146         { "|%",      "%",       "|", "t" },
01147 
01148         // try a other special chars as escape
01149         {
01150             "[[[][([)[|[^[-[+[*[_[%[?[{[}$.\\",
01151             "[]()|^-+*_%?{}$.\\", "[", "t"
01152         },
01153         {
01154             "[[[{[([|[?[^[*[%[+[-[_[)[}[]$.\\",
01155             "[{(|?^*%+-_)}]$.\\", "[", "t"
01156         },
01157         {
01158             "][]]](])]|]^]-]+]*]_]%]?]{]}$.\\",
01159             "[]()|^-+*_%?{}$.\\", "]", "t"
01160         },
01161         {
01162             ".[.].(.).|.^.-.+.*._.%.?.{.}$\\",
01163             "[]()|^-+*_%?{}$\\", ".", "t"
01164         },
01165         {
01166             "*[*]*(*)*|*^*-*+***_*%*?*{*}$.\\",
01167           "[]()|^-+*_%?{}$.\\", "*", "t"
01168         },
01169         {
01170             "_[_]_(_)_|_^_-_+_*___%_?_{_}$.\\",
01171             "[]()|^-+*_%?{}$.\\", "_", "t"
01172         },
01173         {
01174             "%[%]%(%)%|%^%-%+%*%_%%%?%{%}$.\\",
01175             "[]()|^-+*_%?{}$.\\", "%", "t"
01176         },
01177 
01178         { "_",       "ab",      "|", "f" },
01179         { "|_",      "_a",      "|", "f" },
01180         { "|_",      "a_",      "|", "f" },
01181         { "|_",      "a",       "|", "f" },
01182         { "|%",      "a",       "|", "f" },
01183         { "|%",      "ab",      "|", "f" },
01184         { "||",      "a",       "|", "f" },
01185         { "((",      "a",       "(", "f" },
01186 
01187         { "X",       "X",       "X", "X" }  // end sentinel
01188     };
01189     string expPat;
01190     for (i = 0; *test[i][0] != 'X'; i++) {
01191         BOOST_MESSAGE(" ========== escape " << i << " " <<test[i][0] <<
01192                       test[i][1] << " " << test[i][2] << " "
01193                       << test[i][3]);
01194         try {
01195             SqlSimilarPrep<1,1>(
01196                 test[i][0],
01197                 strlen(test[i][0]),
01198                 test[i][2],
01199                 strlen(test[i][2]),
01200                 expPat);
01201             boost::regex exp(expPat);
01202             result = SqlRegExp<1,1>(
01203                 test[i][1],
01204                 strlen(test[i][1]),
01205                 strlen(test[i][0]),
01206                 exp);
01207         } catch (char const * const ptr) {
01208             // unexpected exception
01209             BOOST_MESSAGE("unexpected SQL exception: " << ptr);
01210             BOOST_CHECK(0);
01211         } catch (boost::bad_expression badexp) {
01212             // regex format problem
01213             BOOST_MESSAGE("unexpected regex exception: "
01214                           <<badexp.what());
01215             BOOST_CHECK(0);
01216         } catch (...) {
01217             // unexpected exception
01218             BOOST_MESSAGE("unexpected unknown exception");
01219             BOOST_CHECK(0);
01220         }
01221 
01222 
01223         if (*(test[i][3]) == 't') {
01224             if (!result) {
01225                 BOOST_MESSAGE("|" << test[i][1] <<
01226                               "| |" << test[i][0] << "| expPat=|" <<
01227                               expPat << "|");
01228             }
01229             BOOST_CHECK(result);
01230         } else {
01231             if (result) {
01232                 BOOST_MESSAGE("|" << test[i][1] <<
01233                               "| |" << test[i][0] << "| expPat=|" <<
01234                               expPat << "|");
01235             }
01236             BOOST_CHECK(!result);
01237         }
01238     }
01239 }
01240 
01241 void
01242 SqlRegExpTest::testSqlRegExpSimilarAsciiException()
01243 {
01244     bool caught = false;
01245     bool result = false;
01246     int i;
01247 
01248     const char* test[][4] = {
01249         // pattern, matchValue, escape, exception
01250         { "[[:ALPHA:]]", "a",   ":",     "2200B" },
01251         { "[[:alpha:]]", "a",   ":",     "2200B" },
01252 
01253         { "a",    "a",   "ab",    "22019" },
01254         { "a",    "a",   "\\\\",  "22019" },
01255 
01256         // escape char at end of string
01257         { "=",    "a",   "=",     "2201B" },
01258         // escaping a non-special char
01259         { "=a",   "a",   "=",     "2201B" },
01260         // invalid regular character set name
01261         { "[[:foo:]]", "a",   "=",     "2201B" },
01262         // mixed case regular character set name
01263         // (code allows lower case, outside of standard)
01264         { "[[:Alnum:]]", "a",   "=",     "2201B" },
01265         // sql-only special character in character set
01266         { "[_]",  "a",   "=",     "2201B" },
01267         { "[a_]", "a",   "=",     "2201B" },
01268         { "[%]",  "a",   "=",     "2201B" },
01269         { "[a%]", "a",   "=",     "2201B" },
01270         // regex & sql special character in character set
01271         { "[[]",  "a",   "=",     "2201B" }, // also opening w/o close
01272         { "[]]",  "a",   "=",     "2201B" }, // also close w/o open
01273         { "[(]",  "a",   "=",     "2201B" },
01274         { "[)]",  "a",   "=",     "2201B" },
01275         { "[|]",  "a",   "=",     "2201B" },
01276         { "[^]",  "a",   "=",     "regex" }, // thrown by regex
01277         // TODO: ? Could make this work, but seems at vanishing point of
01278         // utility. BNF says this is not legal.
01279         //{ "[-]",  "a",   "=",     "2201B" }, // technically should be caught
01280         { "[+]",  "a",   "=",     "2201B" },
01281         { "[*]",  "a",   "=",     "2201B" },
01282         { "[_]",  "a",   "=",     "2201B" },
01283         { "[?]",  "a",   "=",     "2201B" },
01284         { "[{]",  "a",   "=",     "2201B" },
01285         { "[}]",  "a",   "=",     "2201B" },
01286 
01287 
01288         { "[a]",  "a",   "[",    "2200C" },
01289         { "[a]",  "a",   "]",    "2200C" },
01290         { "(a)",  "a",   "(",    "2200C" },
01291         { "(a)",  "a",   ")",    "2200C" },
01292         { "a|b",  "a",   "|",    "2200C" },
01293         { "[^a]", "a",   "^",    "2200C" },
01294         { "[a-b]","a",   "-",    "2200C" },
01295         { "(a)+", "a",   "+",    "2200C" },
01296         { "(a)*", "a",   "*",    "2200C" },
01297         { "a_",   "a",   "_",    "2200C" },
01298         { "a%",   "a",   "%",    "2200C" },
01299 
01300         { "X",    "X",   "X",     "X" }  // end sentinel
01301     };
01302     string expPat;
01303     for (i = 0; *test[i][0] != 'X'; i++) {
01304         BOOST_MESSAGE(" ===== exception " << i << " " <<test[i][0]);
01305         caught = false;
01306         try {
01307             SqlSimilarPrep<1,1>(
01308                 test[i][0],
01309                 strlen(test[i][0]),
01310                 test[i][2],
01311                 strlen(test[i][2]),
01312                 expPat);
01313             boost::regex exp(expPat);
01314             result = SqlRegExp<1, 1>(
01315                 test[i][1],
01316                 strlen(test[i][1]),
01317                 strlen(test[i][0]),
01318                 exp);
01319         } catch (char const * const ex) {
01320             caught = true;
01321             if (strcmp(ex, test[i][3])) {
01322                 BOOST_MESSAGE(
01323                     test[i][0] << " " << test[i][1]
01324                     << " " << test[i][2] << " expected: |"
01325                     << test[i][3] << "| got: |" << ex << "|");
01326                 BOOST_CHECK(0);
01327             }
01328         } catch (boost::bad_expression badexp) {
01329             // regex format problem
01330             BOOST_MESSAGE("got boost exception " << test[i][3]);
01331             if (!strcmp("regex", test[i][3])) {
01332                 BOOST_MESSAGE("setting caught to true");
01333                 caught = true;
01334             } else {
01335                 BOOST_MESSAGE("unexpected regex exception: "
01336                               << badexp.what());
01337                 BOOST_CHECK(0);
01338             }
01339         } catch (...) {
01340             // unexpected exception
01341             BOOST_MESSAGE("unexpected unknown exception");
01342             BOOST_CHECK(0);
01343         }
01344 
01345         if (!caught) {
01346             BOOST_CHECK(0);
01347         }
01348     }
01349 }
01350 
01351 
01352 FENNEL_UNIT_TEST_SUITE(SqlRegExpTest);
01353 
01354 // End SqlRegExpTest.cpp

Generated on Mon Jun 22 04:00:13 2009 for Fennel by  doxygen 1.5.1