SqlString.h

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/calculator/SqlString.h#4 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2005-2009 The Eigenbase Project
00005 // Copyright (C) 2004-2009 SQLstream, Inc.
00006 // Copyright (C) 2009-2009 LucidEra, Inc.
00007 //
00008 // This program is free software; you can redistribute it and/or modify it
00009 // under the terms of the GNU General Public License as published by the Free
00010 // Software Foundation; either version 2 of the License, or (at your option)
00011 // any later version approved by The Eigenbase Project.
00012 //
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public License for more details.
00017 //
00018 // You should have received a copy of the GNU General Public License
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 //
00022 // SqlString
00023 //
00024 // An ASCII & UCS2 string library that adheres to the SQL99 standard definitions
00025 */
00026 #ifndef Fennel_SqlString_Included
00027 #define Fennel_SqlString_Included
00028 
00029 #ifdef HAVE_ICU
00030 #include <unicode/ustring.h>
00031 #endif
00032 
00033 #include <limits>
00034 
00035 FENNEL_BEGIN_NAMESPACE
00036 
00037 #if !(defined LITTLEENDIAN || defined BIGENDIAN)
00038 #error "endian not defined"
00039 #endif
00040 
00087 
00088 
00089 
00090 
00091 
00092 
00093 
00094 
00095 
00096 
00097 
00098 
00099 
00100 
00101 
00102 
00103 
00104 
00105 
00106 
00107 //
00108 //  TODO: Does not implement an implementation defined max length.
00109 int FENNEL_CALCULATOR_EXPORT
00110 SqlStrCat(
00111     char* dest,
00112     int destStorageBytes,
00113     int destLenBytes,
00114     char const * const str,
00115     int strLenBytes);
00116 
00135 //
00136 //  TODO: Does not implement an implementation defined max length.
00137 int FENNEL_CALCULATOR_EXPORT
00138 SqlStrCat(
00139     char* dest,
00140     int destStorageBytes,
00141     char const * const str1,
00142     int str1LenBytes,
00143     char const * const str2,
00144     int str2LenBytes);
00145 
00152 int FENNEL_CALCULATOR_EXPORT
00153 SqlStrCmp_Bin(
00154     char const * const str1,
00155     int str1LenBytes,
00156     char const * const str2,
00157     int str2LenBytes);
00158 
00186 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00187 int
00188 SqlStrCmp(
00189     char const * const str1,
00190     int str1LenBytes,
00191     char const * const str2,
00192     int str2LenBytes,
00193     int trimchar = ' ')
00194 {
00195     assert(str1LenBytes >= 0);
00196     assert(str2LenBytes >= 0);
00197 
00198     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00199         if (CodeUnitBytes == 1) {
00200             char const * start = str1;
00201             char const * end = str1 + str1LenBytes;
00202             int str1TrimLenBytes;
00203             int str2TrimLenBytes;
00204 
00205             if (end != start) {
00206                 end--;
00207                 while (end != start && *end == trimchar) {
00208                     end--;
00209                 }
00210                 if (end != start || *end != trimchar) {
00211                     end++;
00212                 }
00213             }
00214             str1TrimLenBytes = end - start;
00215 
00216             start = str2;
00217             end = str2 + str2LenBytes;
00218 
00219             if (end != start) {
00220                 end--;
00221                 while (end != start && *end == trimchar) {
00222                     end--;
00223                 }
00224                 if (end != start || *end != trimchar) {
00225                     end++;
00226                 }
00227             }
00228             str2TrimLenBytes = end - start;
00229             return SqlStrCmp_Bin(
00230                 str1, str1TrimLenBytes,
00231                 str2, str2TrimLenBytes);
00232 #if 0
00233             int minLenBytes = str1TrimLenBytes > str2TrimLenBytes ?
00234                 str2TrimLenBytes : str1TrimLenBytes;
00235 
00236 
00237             // To allow 0, "Null", values in string, uses memcmp over
00238             // strcmp. Not strictly needed, may have some future value.
00239             // First, check for differences in "common" length. If same
00240             // values, declare the longer string be declared "larger".
00241             int memc = memcmp(str1, str2, minLenBytes);
00242             if (memc > 0) {
00243                 // Normalize to -1, 0, 1
00244                 return 1;
00245             } else if (memc < 0) {
00246                 // Normalize to -1, 0, 1
00247                 return -1;
00248             } else if (str1TrimLenBytes == str2TrimLenBytes) {
00249                 // memc == 0
00250                 // Equal length & contain same data -> equal
00251                 return 0;
00252             } else if (str1TrimLenBytes > str2TrimLenBytes) {
00253                 return 1;
00254             } else {
00255                 return -1;
00256             }
00257 #endif
00258         } else if (CodeUnitBytes == 2) {
00259             // TODO: Add UCS2 here
00260             throw std::logic_error("no UCS2");
00261         } else {
00262             throw std::logic_error("no such encoding");
00263         }
00264     } else {
00265         throw std::logic_error("no UTF8/16/32");
00266     }
00267     return 0;
00268 }
00269 
00276 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00277 int
00278 SqlStrCpy_Fix(
00279     char* dest,
00280     int destStorageBytes,
00281     char const * const str,
00282     int strLenBytes,
00283     int padchar = ' ')
00284 {
00285     if (strLenBytes > destStorageBytes) {
00286         // SQL99 22.1 22-001 "String Data Right truncation"
00287         throw "22001";
00288     }
00289     memcpy(dest, str, strLenBytes);
00290 
00291     // pad rest of dest storage
00292     if (MaxCodeUnitsPerCodePoint == 1) {
00293         if (CodeUnitBytes == 1) {
00294             // ASCII
00295             memset(dest + strLenBytes, padchar, destStorageBytes - strLenBytes);
00296         } else if (CodeUnitBytes == 2) {
00297             // UCS2
00298             assert(!(destStorageBytes & 1));
00299             assert(!(strLenBytes & 1));
00300             char *ptr = dest + strLenBytes;
00301             char *end = dest + destStorageBytes;
00302             char byte1, byte2;
00303 #ifdef LITTLEENDIAN
00304             byte2 = (padchar >> 8) & 0xff;
00305             byte1 = padchar & 0xff;
00306 #else
00307             byte1 = (padchar >> 8) & 0xff;
00308             byte2 = padchar & 0xff;
00309 #endif
00310             assert(!((end - ptr) & 1));
00311             while (ptr < end) {
00312                 *ptr = byte1;
00313                 *(ptr + 1) = byte2;
00314                 ptr += 2;
00315             }
00316         } else {
00317             throw std::logic_error("no UCS-4");
00318         }
00319     } else {
00320         throw std::logic_error("no UTF8/16/32");
00321     }
00322     return destStorageBytes;
00323 }
00324 
00325 
00333 int FENNEL_CALCULATOR_EXPORT
00334 SqlStrCpy_Var(
00335     char* dest,
00336     int destStorageBytes,
00337     char const * const str,
00338     int strLenBytes);
00339 
00340 
00344 int FENNEL_CALCULATOR_EXPORT
00345 SqlStrLenBit(int strLenBytes);
00346 
00347 
00351 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00352 int
00353 SqlStrLenChar(
00354     char const * const str,
00355     int strLenBytes)
00356 {
00357     if (CodeUnitBytes == 1 && MaxCodeUnitsPerCodePoint == 1) {
00358         // ASCII
00359         return strLenBytes;
00360     } else if (CodeUnitBytes == 2 & MaxCodeUnitsPerCodePoint == 1) {
00361         // UCS2
00362         assert(!(strLenBytes & 1));
00363         return strLenBytes >> 1;
00364     } else {
00365         throw std::logic_error("no UTF8/16/32");
00366     }
00367 }
00368 
00372 int FENNEL_CALCULATOR_EXPORT
00373 SqlStrLenOct(int strLenBytes);
00374 
00383 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00384 int
00385 SqlStrOverlay(
00386     char* dest,
00387     int destStorageBytes,
00388     char const * const str,
00389     int strLenBytes,
00390     char const * const over,
00391     int overLenBytes,
00392     int startChar,
00393     int lenChar,
00394     int lenSpecified)
00395 {
00396     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00397         if (CodeUnitBytes == 1) {
00398             if (!lenSpecified) {
00399                 lenChar = overLenBytes;
00400             }
00401             if (lenChar < 0 || startChar < 1) {
00402                 // Overlay is defined in terms of substring. These conditions
00403                 // would, I believe, generate a substring error. Also
00404                 // another "reference" sql database gets angry under these
00405                 // conditions. Therefore:
00406                 // Per SQL99 Part 2 Section 6.18 General Rule 3.d, generate a
00407                 // "data exception substring error". SQL99 22.1 22-011
00408                 throw "22011";
00409             }
00410 
00411             int leftLenBytes = startChar - 1;         // 1-index to 0-index
00412             if (leftLenBytes > strLenBytes) {
00413                 leftLenBytes = strLenBytes;
00414             }
00415             char const *rightP = str + leftLenBytes + lenChar;
00416             int rightLenBytes = strLenBytes - (leftLenBytes + lenChar);
00417             if (rightLenBytes < 0) {
00418                 rightLenBytes = 0;
00419             }
00420             assert(leftLenBytes >= 0);
00421             assert(rightLenBytes >= 0);
00422             assert(rightP >= str);
00423 
00424             if (leftLenBytes + rightLenBytes + overLenBytes
00425                 > destStorageBytes)
00426             {
00427                 // SQL99 22.1 22-001 "String Data Right truncation"
00428                 throw "22001";
00429             }
00430 
00431             char *dp = dest;
00432 
00433             memcpy(dp, str, leftLenBytes);
00434             dp += leftLenBytes;
00435             memcpy(dp, over, overLenBytes);
00436             dp += overLenBytes;
00437             memcpy(dp, rightP, rightLenBytes);
00438             dp += rightLenBytes;
00439 
00440             return dp - dest;
00441         } else if (CodeUnitBytes == 2) {
00442             // TODO: Add UCS2 here
00443             throw std::logic_error("no UCS2");
00444         } else {
00445             throw std::logic_error("no such encoding");        }
00446     } else {
00447         throw std::logic_error("no UTF8/16/32");
00448     }
00449     throw std::logic_error("TODO: Fix this");
00450     return 0; // TODO: Fix this
00451 }
00452 
00457 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00458 int
00459 SqlStrPos(
00460     char const * const str,
00461     int strLenBytes,
00462     char const * const find,
00463     int findLenBytes)
00464 {
00465     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00466         if (CodeUnitBytes == 1) {
00467             // SQL99 Part 2 Section 6.17 General Rule 2.a.
00468             if (!findLenBytes) {
00469                 return 1;
00470             }
00471             // SQL99 Part 2 Section 6.17 General Rule 2.c.
00472             if (findLenBytes > strLenBytes) {
00473                 return 0;
00474             }
00475             assert(findLenBytes > 0);
00476             assert(strLenBytes > 0);
00477             assert(strLenBytes - findLenBytes >= 0);
00478 
00479             register char const * s = str;
00480             char const * end = 1 + s + (strLenBytes - findLenBytes);
00481 
00482             while (s < end) {
00483                 // search for first char of find
00484                 s = reinterpret_cast<char const *>(memchr(s, *find, end - s));
00485                 if (!s) {
00486                     return 0;                // Case C.
00487                 }
00488                 if (!memcmp(s, find, findLenBytes)) {
00489                     // add 1 to make result 1-indexed.
00490                     return (s - str) + 1;   // Case B.
00491                 } else {
00492                     s++;
00493                 }
00494             }
00495             return 0;                            // Case C.
00496         } else if (CodeUnitBytes == 2) {
00497             // TODO: Add UCS2 here
00498             throw std::logic_error("no UCS2");
00499         } else {
00500             throw std::logic_error("no such encoding");
00501         }
00502     } else {
00503         throw std::logic_error("no UTF8/16/32");
00504     }
00505     throw std::logic_error("TODO: Fix this");
00506     return 0; // TODO: Fix this
00507 }
00508 
00520 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00521 int
00522 SqlStrSubStr(
00523     char const ** dest,
00524     int destStorageBytes,
00525     char const * const str,
00526     int strLenBytes,
00527     int subStartChar,
00528     int subLenChar,
00529     int subLenCharSpecified)
00530 {
00531     if (CodeUnitBytes == MaxCodeUnitsPerCodePoint) {
00532         if (CodeUnitBytes == 1) {
00533             int e;
00534             if (subLenCharSpecified) {
00535                 e = subStartChar + subLenChar;
00536             } else {
00537                 e = strLenBytes + 1;
00538                 if (subStartChar > e) {
00539                     e = subStartChar;
00540                 }
00541             }
00542 
00543             if (e < subStartChar) {
00544                 // Per SQL99 Part 2 Section 6.18 General Rule 3.d, generate a
00545                 // "data exception substring error". SQL99 22.1 22-011
00546                 throw "22011";
00547             }
00548 
00549             if (subStartChar > strLenBytes || e < 1) {
00550                 return 0;
00551             }
00552 
00553             int s1 = 1;
00554             if (subStartChar > s1) {
00555                 s1 = subStartChar;
00556             }
00557             int e1 = strLenBytes + 1;
00558             if (e < e1) {
00559                 e1 = e;
00560             }
00561             int l1 = e1 - s1;
00562 
00563             if (l1 > destStorageBytes) {
00564                 // SQL99 22.1 22-001 "String Data Right truncation"
00565                 throw "22001";
00566             }
00567             if (l1 < 0) {
00568                 // Expected behavior not clear.
00569                 // "data exception substring error". SQL99 22.1 22-011
00570                 throw "22011";
00571             }
00572 
00573             // - 1 converts from 1-indexed to 0-indexed
00574             *dest = str + s1 - 1;
00575             return l1;
00576         } else if (CodeUnitBytes == 2) {
00577             // TODO: Add UCS2 here
00578             throw std::logic_error("no UCS2");
00579         } else {
00580             throw std::logic_error("no such encoding");
00581         }
00582     } else {
00583         throw std::logic_error("no UTF8/16/32");
00584     }
00585     throw std::logic_error("TODO: Fix this");
00586     return 0; // TODO: Fix this
00587 }
00588 
00590 enum SqlStrAlterCaseAction {
00591     AlterCaseUpper,
00592     AlterCaseLower
00593 };
00594 
00596 template <int CodeUnitBytes,
00597           int MaxCodeUnitsPerCodePoint,
00598           SqlStrAlterCaseAction Action>
00599 int
00600 SqlStrAlterCase(
00601     char* dest,
00602     int destStorageBytes,
00603     char const * const src,
00604     int srcLenBytes,
00605     char const * const locale = 0)
00606 {
00607     int retVal;
00608 
00609     if (srcLenBytes > destStorageBytes) {
00610         // SQL99 22.1 22-001 "String Data Right truncation"
00611         throw "22001";
00612     }
00613 
00614     if (MaxCodeUnitsPerCodePoint == 1) {
00615         if (CodeUnitBytes == 1) {
00616             register char const * s = src;
00617             register char* d = dest;
00618             char* e = dest + srcLenBytes;
00619             while (d < e) {
00620                 switch (Action) {
00621                 case AlterCaseUpper:
00622                     *(d++) = toupper(*(s++));
00623                     break;
00624                 case AlterCaseLower:
00625                     *(d++) = tolower(*(s++));
00626                     break;
00627                 default:
00628                     throw std::logic_error("AlterCase Action");
00629                     break;
00630                 }
00631             }
00632             retVal = srcLenBytes;
00633         } else if (CodeUnitBytes == 2) {
00634             // UCS2
00635 #ifdef HAVE_ICU
00636             assert(!(destStorageBytes & srcLenBytes & 1));
00637             // strings must be short aligned
00638             // TODO: Change tuples to force strings to be short aligned.
00639             assert(!(reinterpret_cast<int>(src) & 1));
00640             assert(!(reinterpret_cast<int>(dest) & 1));
00641             assert(sizeof(UChar) == 2);
00642             assert(locale);   // Don't allow locale defaulting
00643 
00644             int32_t destStorageUChar = destStorageBytes >> 1;
00645             int32_t srcLenUChar = srcLenBytes >> 1;
00646             int32_t newLenUChar;
00647             UErrorCode errorCode = U_ZERO_ERROR;
00648 
00649             switch (Action) {
00650             case AlterCaseUpper:
00651                 newLenUChar = u_strToUpper(
00652                     reinterpret_cast<UChar*>(dest),
00653                     destStorageUChar,
00654                     reinterpret_cast<UChar const *>(src),
00655                     srcLenUChar,
00656                     locale,
00657                     &errorCode);
00658                 break;
00659             case AlterCaseLower:
00660                 newLenUChar = u_strToLower(
00661                     reinterpret_cast<UChar*>(dest),
00662                     destStorageUChar,
00663                     reinterpret_cast<UChar const *>(src),
00664                     srcLenUChar,
00665                     locale,
00666                     &errorCode);
00667                 break;
00668             default:
00669                 throw std::logic_error("AlterCase Action");
00670                 break;
00671             }
00672 
00673             if (newLenUChar > destStorageUChar) {
00674                 // SQL99 22.1 22-001 "String Data Right truncation"
00675                 throw "22001";
00676             }
00677             if (U_FAILURE(errorCode)) {
00678                 // TODO: Clean up ICU error handling.
00679                 // Other ICU error. Unlikely to occur?
00680                 throw u_errorName(errorCode);
00681             }
00682             retVal = newLenUChar << 1;
00683 #else
00684             throw std::logic_error("no UCS2");
00685 #endif
00686         } else {
00687             throw std::logic_error("no such encoding");
00688         }
00689     } else {
00690         // Note: Potentially UTF16 can be handled by UCS2 code
00691         throw std::logic_error("no UTF8/16/32");
00692     }
00693     return retVal;
00694 }
00695 
00703 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00704 int
00705 SqlStrTrim(
00706     char* dest,
00707     int destStorageBytes,
00708     char const * const str,
00709     int strLenBytes,
00710     int trimLeft,
00711     int trimRight,
00712     int trimchar = ' ')
00713 {
00714     char const * start = str;
00715     char const * end = str + strLenBytes;
00716     assert(strLenBytes >= 0);
00717     int newLenBytes;
00718 
00719     if (MaxCodeUnitsPerCodePoint == 1) {
00720         if (CodeUnitBytes == 1) {
00721             // ASCII
00722             // If many pad characters are expected, consider using memrchr()
00723             if (trimLeft) {
00724                 while (start != end && *start == trimchar) {
00725                     start++;
00726                 }
00727             }
00728             if (trimRight && end != start) {
00729                 end--;
00730                 while (end != start && *end == trimchar) {
00731                     end--;
00732                 }
00733                 if (end != start || *end != trimchar) {
00734                     end++;
00735                 }
00736             }
00737             newLenBytes = end - start;
00738         } else if (CodeUnitBytes == 2) {
00739             // UCS2
00740             assert(!(strLenBytes & 1));
00741             char byte1, byte2;
00742 #ifdef LITTLEENDIAN
00743             byte2 = (trimchar >> 8) & 0xff;
00744             byte1 = trimchar & 0xff;
00745 #else
00746             byte1 = (trimchar >> 8) & 0xff;
00747             byte2 = trimchar & 0xff;
00748 #endif
00749             if (trimLeft) {
00750                 while (start < end && *start == byte1 && *(start+1) == byte2) {
00751                     start += 2;
00752                 }
00753             }
00754             if (trimRight && end != start) {
00755                 end -= 2;
00756                 while (end > start && *end == byte1 && *(end+1) == byte2) {
00757                     end -= 2;
00758                 }
00759                 if (end != start || *end != trimchar) {
00760                     end += 2;
00761                 }
00762             }
00763             newLenBytes = end - start;
00764             assert(!(newLenBytes & 1));
00765         } else {
00766             throw std::logic_error("no such encoding");
00767         }
00768     } else {
00769         // Note: Potentially UTF16 can be handled by UCS2 code
00770         throw std::logic_error("no UTF8/16/32");
00771     }
00772 
00773     if (newLenBytes > destStorageBytes) {
00774         // SQL99 22.1 22-001 "String Data Right truncation"
00775         throw "22001";
00776     }
00777     memcpy(dest, start, newLenBytes);
00778     return newLenBytes;
00779 }
00780 
00792 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00793 int
00794 SqlStrTrim(
00795     char const ** result,
00796     char const * const str,
00797     int strLenBytes,
00798     int trimLeft,
00799     int trimRight,
00800     int trimchar = ' ')
00801 {
00802     char const * start = str;
00803     char const * end = str + strLenBytes;
00804     assert(strLenBytes >= 0);
00805 
00806     if (MaxCodeUnitsPerCodePoint == 1) {
00807         if (CodeUnitBytes == 1) {
00808             // ASCII
00809             // If many pad characters are expected, consider using memrchr()
00810             if (trimLeft) {
00811                 while (start != end && *start == trimchar) {
00812                     start++;
00813                 }
00814             }
00815             if (trimRight && end != start) {
00816                 end--;
00817                 while (end != start && *end == trimchar) {
00818                     end--;
00819                 }
00820                 if (end != start || *end != trimchar) {
00821                     end++;
00822                 }
00823             }
00824         } else if (CodeUnitBytes == 2) {
00825             // UCS2
00826             assert(!(strLenBytes & 1));
00827             char byte1, byte2;
00828 #ifdef LITTLEENDIAN
00829             byte2 = (trimchar >> 8) & 0xff;
00830             byte1 = trimchar & 0xff;
00831 #else
00832             byte1 = (trimchar >> 8) & 0xff;
00833             byte2 = trimchar & 0xff;
00834 #endif
00835             if (trimLeft) {
00836                 while (start < end && *start == byte1 && *(start+1) == byte2) {
00837                     start += 2;
00838                 }
00839             }
00840             if (trimRight && end != start) {
00841                 end -= 2;
00842                 while (end > start && *end == byte1 && *(end+1) == byte2) {
00843                     end -= 2;
00844                 }
00845                 if (end != start || *end != trimchar) {
00846                     end += 2;
00847                 }
00848             }
00849             assert(!((end - start) & 1));
00850         } else {
00851             throw std::logic_error("no such encoding");
00852         }
00853     } else {
00854         // Note: Potentially UTF16 can be handled by UCS2 code
00855         throw std::logic_error("no UTF8/16/32");
00856     }
00857 
00858     *result = start;
00859     return end - start;
00860 }
00861 
00865 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
00866 int64_t
00867 SqlStrCastToExact(
00868     char const * const str,
00869     int strLenBytes,
00870     int padChar = ' ')
00871 {
00872     int64_t rv = 0;
00873     bool negative = false;
00874 
00875     if (MaxCodeUnitsPerCodePoint == 1) {
00876         if (CodeUnitBytes == 1) {
00877             // ASCII
00878             // comparison must be unsigned to work for > 128
00879             unsigned char const *ptr =
00880                 reinterpret_cast<unsigned char const *>(str);
00881             unsigned char const *end =
00882                 reinterpret_cast<unsigned char const *>(str + strLenBytes);
00883 
00884             // STATE: parse optional sign, consume leading white space
00885             while (ptr < end) {
00886                 if (*ptr == '-') {
00887                     // move onto next state, do not allow whitespace
00888                     // after -, for example '- 4' is not allowed
00889                     negative = true;
00890                     ptr++;
00891                     break;
00892                 } else if (*ptr == '+') {
00893                     // move onto next state, do not allow whitespace
00894                     // after +, for example '+ 4' is not allowed
00895                     ptr++;
00896                     break;
00897                 } else if (*ptr == padChar) {
00898                     // consume leading whitespace
00899                     ptr++;
00900                 } else if (*ptr >= '0' &&  *ptr <= '9') {
00901                     // found a number. don't advance, move onto next state
00902                     break;
00903                 } else {
00904                     // unexpected character found
00905                     // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
00906                     // exception -- invalid character value for cast
00907                     throw "22018";
00908                 }
00909             }
00910 
00911             if (ptr >= end) {
00912                 // no number found
00913                 // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
00914                 // exception -- invalid character value for cast
00915                 throw "22018";
00916             }
00917 
00918             // STATE: Parse numbers until padChar, end, or illegal char
00919             bool parsed = false;
00920             bool got_nonzero = false;
00921             bool overflow = false;
00922             int ndigits = 0;
00923             while (ptr < end) {
00924                 if (*ptr >= '0' && *ptr <= '9') {
00925                     // number
00926                     if (*ptr != '0') {
00927                         got_nonzero = true;
00928                     }
00929                     // Only start counting digits after 1st nonzero digit
00930                     if (got_nonzero) {
00931                         ndigits++;
00932                     }
00933                     if (ndigits <= 18) {
00934                         rv = (rv * 10) + (*(ptr++) - '0');
00935                     } else if (ndigits == 19) {
00936                         // Handle 19th digit overflow
00937                         int64_t tmp;
00938                         tmp = rv * 10 + (*(ptr++) - '0');
00939                         if (tmp < rv) {
00940                             if (negative) {
00941                                 if (-tmp
00942                                     == std::numeric_limits<int64_t>::min())
00943                                 {
00944                                     // okay
00945                                 } else {
00946                                     overflow = true;
00947                                 }
00948                             } else {
00949                                 overflow = true;
00950                             }
00951                         }
00952                         rv = tmp;
00953                     } else {
00954                         rv = (rv * 10) + (*(ptr++) - '0');
00955                         overflow = true;
00956                     }
00957                     parsed = true;
00958                 } else if (*ptr == padChar) {
00959                     // move onto next state, end of number
00960                     ptr++;
00961                     break;
00962                 } else {
00963                     // illegal character
00964                     parsed = false;
00965                     break;
00966                 }
00967             }
00968 
00969             // STATE: Parse padChar until end or illegal char
00970             while (ptr < end) {
00971                 if (*(ptr++) != padChar) {
00972                     // unexpected character after end of number
00973                     parsed = false;
00974                     break;
00975                 }
00976             }
00977             if (!parsed) {
00978                 // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
00979                 // exception -- invalid character value for cast
00980                 throw "22018";
00981             }
00982 
00983             // Throw overflow exception only if parse okay
00984             if (overflow) {
00985                 // data exception -- numeric value out of range
00986                 throw "22003";
00987             }
00988         } else if (CodeUnitBytes == 2) {
00989             // TODO: Add UCS2 here
00990             throw std::logic_error("no UCS2");
00991         } else {
00992             throw std::logic_error("no such encoding");
00993         }
00994     } else {
00995         throw std::logic_error("no UTF8/16/32");
00996     }
00997 
00998     if (negative) {
00999         return rv * -1;
01000     } else {
01001         return rv;
01002     }
01003 }
01004 
01008 inline int64_t SqlExactMax(int precision, bool negative)
01009 {
01010     int64_t rv;
01011     if (precision < 19) {
01012         rv = 1;
01013         for (int i = 0; i < precision; i++) {
01014             rv *= 10;
01015         }
01016         rv--;
01017     } else {
01018         if (negative) {
01019             rv = -std::numeric_limits<int64_t>::min();
01020         } else {
01021             rv = std::numeric_limits<int64_t>::max();
01022         }
01023     }
01024     return rv;
01025 }
01026 
01030 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01031 int64_t
01032 SqlStrCastToExact(
01033     char const * const str,
01034     int strLenBytes,
01035     int precision,
01036     int scale,
01037     int padChar = ' ')
01038 {
01039     int64_t rv = 0;
01040     bool negative = false;
01041 
01042     assert(precision > 0);
01043     if (MaxCodeUnitsPerCodePoint == 1) {
01044         if (CodeUnitBytes == 1) {
01045             // ASCII
01046             // comparison must be unsigned to work for > 128
01047             unsigned char const *ptr =
01048                 reinterpret_cast<unsigned char const *>(str);
01049             unsigned char const *end =
01050                 reinterpret_cast<unsigned char const *>(str + strLenBytes);
01051 
01052             // STATE: parse optional sign, consume leading white space
01053             while (ptr < end) {
01054                 if (*ptr == '-') {
01055                     // move onto next state, do not allow whitespace
01056                     // after -, for example '- 4' is not allowed
01057                     negative = true;
01058                     ptr++;
01059                     break;
01060                 } else if (*ptr == '+') {
01061                     // move onto next state, do not allow whitespace
01062                     // after +, for example '+ 4' is not allowed
01063                     ptr++;
01064                     break;
01065                 } else if (*ptr == padChar) {
01066                     // consume leading whitespace
01067                     ptr++;
01068                 } else if (*ptr >= '0' &&  *ptr <= '9') {
01069                     // found a number. don't advance, move onto next state
01070                     break;
01071                 } else if (*ptr == '.') {
01072                     // found decimal point. don't advance, move onto next state
01073                     break;
01074                 } else {
01075                     // unexpected character found
01076                     // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
01077                     // exception -- invalid character value for cast
01078                     throw "22018";
01079                 }
01080             }
01081 
01082             if (ptr >= end) {
01083                 // no number found
01084                 // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
01085                 // exception -- invalid character value for cast
01086                 throw "22018";
01087             }
01088 
01089             // STATE: Parse numbers until padChar, end, or illegal char
01090             bool parsed = false;
01091             bool decimal_parsed = false;
01092             bool roundup = false;
01093             bool overflow = false;
01094             int ignored = 0;
01095             int decimal_position = 0;
01096             int mantissa_digits = 0, parsed_digits = 0;
01097             int digit;
01098             int64_t mantissa = 0;
01099             int64_t exponent = 0;
01100             while (ptr < end) {
01101                 if (*ptr >= '0' && *ptr <= '9') {
01102                     // number
01103                     digit = (*(ptr++) - '0');
01104 
01105                     // Only add to mantissa if precision not reached
01106                     if (mantissa_digits < precision) {
01107                         if (mantissa_digits < 18) {
01108                             mantissa = mantissa * 10 + digit;
01109                         } else if (mantissa_digits == 18) {
01110                             // Handle 19th digit overflow
01111                             int64_t tmp;
01112                             tmp = mantissa * 10 + digit;
01113                             if (tmp < mantissa) {
01114                                 if (negative) {
01115                                     if (-tmp
01116                                         == std::numeric_limits<int64_t>::min())
01117                                     {
01118                                         // okay
01119                                     } else {
01120                                         // data exception -- numeric
01121                                         // value out of range
01122                                         overflow = true;
01123                                     }
01124                                 } else {
01125                                     // data exception -- numeric value
01126                                     // out of range
01127                                     overflow = true;
01128                                 }
01129                             }
01130                             mantissa = tmp;
01131                         } else {
01132                             overflow = true;
01133                         }
01134 
01135                         if (mantissa != 0) {
01136                             mantissa_digits++;
01137                         }
01138                     } else {
01139                         // Decide if ignored digits (after precision is lost)
01140                         // causes the final result to be rounded up or not
01141                         ignored++;
01142                         if (ignored == 1) {
01143                             roundup = (digit >= 5);
01144                         }
01145                     }
01146                     parsed = true;
01147                     if (decimal_parsed || mantissa != 0) {
01148                         parsed_digits++;
01149                     }
01150                 } else if (!decimal_parsed && (*ptr == '.')) {
01151                     // decimal point
01152                     ptr++;
01153                     decimal_parsed = true;
01154                     decimal_position = parsed_digits;
01155                 } else if ((*ptr == 'E') || (*ptr == 'e')) {
01156                     // parse exponent, move into next state
01157                     ptr++;
01158                     if (ptr < end) {
01159                         if (*ptr == '+' || *ptr == '-' ||
01160                             (*ptr >= '0' && *ptr <= '9')) {
01161                             exponent = SqlStrCastToExact
01162                                 <CodeUnitBytes, MaxCodeUnitsPerCodePoint>
01163                                 ((char const * const) ptr, end - ptr, padChar);
01164                         } else {
01165                             parsed = false;
01166                         }
01167                     } else {
01168                         parsed = false;
01169                     }
01170                     ptr = end;
01171                     break;
01172                 } else if (*ptr == padChar) {
01173                     // move onto next state, end of number
01174                     ptr++;
01175                     break;
01176                 } else {
01177                     // illegal character
01178                     parsed = false;
01179                     break;
01180                 }
01181             }
01182 
01183             // STATE: Parse padChar until end or illegal char
01184             while (ptr < end) {
01185                 if (*(ptr++) != padChar) {
01186                     // unexpected character after end of number
01187                     parsed = false;
01188                     break;
01189                 }
01190             }
01191             if (!parsed) {
01192                 // SQL99 Part 2 Section 6.22 General Rule 6.b.i data
01193                 // exception -- invalid character value for cast
01194                 throw "22018";
01195             }
01196 
01197             // Throw overflow exception only if parse okay
01198             if (overflow) {
01199                 // data exception -- numeric value out of range
01200                 throw "22003";
01201             }
01202 
01203             if (!decimal_parsed) {
01204                 decimal_position = parsed_digits;
01205             }
01206 
01207             if (roundup) {
01208                 // Check if digits will increase/overflow
01209                 if (mantissa == SqlExactMax(mantissa_digits, negative)) {
01210                     mantissa_digits++;
01211                 }
01212                 mantissa++;
01213             }
01214 
01215             int parsed_scale =
01216                 parsed_digits - ignored - decimal_position - exponent;
01217 
01218             if (mantissa_digits - parsed_scale > precision - scale) {
01219                 // SQL2003 Part 2 Section 6.12 General Rule 8.a.ii
01220                 // data exception -- numeric value out of range
01221                 // (if leading significant digits are lost)
01222                 throw "22003";
01223             }
01224 
01225             rv = mantissa;
01226 
01227             if (scale > parsed_scale) {
01228                 int64_t tmp;
01229                 for (int i = 0; i < scale - parsed_scale; i++) {
01230                     tmp = rv*10;
01231                     // Check for overflow
01232                     if (tmp < rv) {
01233                         // data exception -- numeric value out of range
01234                         throw "22003";
01235                     }
01236                     rv = tmp;
01237                 }
01238             } else if (scale < parsed_scale) {
01239                 int adjust_scale = parsed_scale - scale;
01240                 for (int i = 0; i < adjust_scale; i++) {
01241                     rv = rv / 10;
01242                 }
01243 
01244                 // Do Rounding
01245                 int64_t factor = 1;
01246                 for (int i = 0; i < adjust_scale; i++) {
01247                     factor *= 10;
01248                 }
01249                 if (mantissa % factor >= factor / 2) {
01250                     // Check if digit will increase/overflow
01251                     if (rv
01252                         == SqlExactMax(
01253                             mantissa_digits - adjust_scale, negative))
01254                     {
01255                         mantissa_digits++;
01256                         if (mantissa_digits - parsed_scale
01257                             > precision - scale) {
01258                             // SQL2003 Part 2 Section 6.12 General Rule 8.a.ii
01259                             // data exception -- numeric value out of range
01260                             // (if leading significant digits are lost)
01261                             throw "22003";
01262                         }
01263                     }
01264                     rv++;
01265                 }
01266             }
01267 
01268         } else if (CodeUnitBytes == 2) {
01269             // TODO: Add UCS2 here
01270             throw std::logic_error("no UCS2");
01271         } else {
01272             throw std::logic_error("no such encoding");
01273         }
01274     } else {
01275         throw std::logic_error("no UTF8/16/32");
01276     }
01277 
01278     if (negative) {
01279         return rv * -1;
01280     } else {
01281         return rv;
01282     }
01283 }
01284 
01292 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01293 double
01294 SqlStrCastToApprox(
01295     char const * const str,
01296     int strLenBytes,
01297     int padChar = ' ')
01298 {
01299     double rv;
01300     if (MaxCodeUnitsPerCodePoint == 1) {
01301         if (CodeUnitBytes == 1) {
01302             // ASCII
01303 
01304             char const *ptr = str;
01305             char const *end = str + strLenBytes;
01306             char *endptr;
01307 
01308             // Skip past any leading whitespace. Allows string with
01309             // arbitrary amounts of leading whitespace.
01310             while (ptr < end && *ptr == padChar) {
01311                 ptr++;
01312             }
01313             int max = end - ptr;
01314 #ifdef __MSVC__
01315             char *tmp = (char *) _alloca(max + 1);
01316 #else
01317             char tmp[max + 1];
01318 #endif
01319             memcpy(tmp, ptr, max);
01320             tmp[max] = 0;
01321             rv = strtod(tmp, &endptr);
01322 
01323             if (endptr == tmp) {
01324                 // SQL99 Part 2 Section 6.22 General Rule 7.b.i "22018"
01325                 // data exception -- invalid character value for cast
01326                 throw "22018";
01327             }
01328 
01329             // verify that trailing characters are all padChar
01330             ptr += endptr - tmp; // advance past parsed digits
01331             while (ptr < end) {
01332                 if (*ptr != padChar) {
01333                     // SQL99 Part 2 Section 6.22 General Rule 7.b.i "22018"
01334                     // data exception -- invalid character value for cast
01335                     throw "22018";
01336                 }
01337                 ptr++;
01338             }
01339 
01340             // Throw exception if overflow
01341             double dmax = std::numeric_limits<double>::max();
01342             if (rv > dmax || rv < -dmax) {
01343                 // Overflow
01344                 throw "22003";
01345             }
01346 
01347         } else if (CodeUnitBytes == 2) {
01348             // TODO: Add UCS2 here
01349             throw std::logic_error("no UCS2");
01350         } else {
01351             throw std::logic_error("no such encoding");
01352         }
01353     } else {
01354         throw std::logic_error("no UTF8/16/32");
01355     }
01356 
01357     return rv;
01358 }
01359 
01368 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01369 int
01370 SqlStrCastFromExact(
01371     char* dest,
01372     int destStorageBytes,
01373     int64_t src,
01374     bool fixed,  // e.g. char, else variable (varchar)
01375     int padchar = ' ')
01376 {
01377     int rv;
01378 
01379     if (MaxCodeUnitsPerCodePoint == 1) {
01380         if (CodeUnitBytes == 1) {
01381             // ASCII
01382 
01383             // TODO: Check performance of doing snprintf and a memcpy vs
01384             // TODO: a home-rolled version with % and /10, etc. Both
01385             // TOOD: require a copy, or some precomputation of string length.
01386             // TODO: Hard to say which would be faster w/o implementing both.
01387             // TODO: Note: can't always snprintf directly into dest, due to
01388             // TODO: null termination wasting a byte.
01389 
01390             // A previous implementation (retained below)
01391             // optimistically tried to snprintf into the output
01392             // buffer, and retried if it would have fit, save for the
01393             // null termination. The logic gets complicated in the
01394             // face of snprintf implementatins that return -1, where
01395             // such information is not possible.  Until an
01396             // optimization pass can be made, always snprintf into a
01397             // temporary buffer, and memcpy the result back if it
01398             // would fit.
01399 
01400             char buf[36];      // #%lld should always fit in 21 bytes.
01401 
01402             // TODO: Bug on mingw where int64_t is not handled correctly
01403             //       by snprintf %lld
01404             // Windows (MS dll) uses %I64d but that causes warning
01405             //       with gcc -Wall
01406             rv = snprintf(buf, 35, "%" FMT_INT64, src);
01407 
01408             // snprintf does not return null termination in length
01409             assert(rv >= 0 && rv <= 35);
01410             if (rv <= destStorageBytes) {
01411                 memcpy(dest, buf, rv);
01412             } else {
01413                 memcpy(dest, buf, destStorageBytes);
01414                 // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01415                 // length), "22001" data exception -- string data, right
01416                 // truncation
01417 
01418                 // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01419                 // length) "22001" data exception -- string data, right
01420                 // truncation
01421                 throw "22001";
01422             }
01423 
01424 
01425 #ifdef SECOND_ALTERNATIVE_IMPLEMENTATION_DOES_NOT_WORK_ON_CYGWIN
01426 
01427             // Older glibc returns -1 from snprintf. logic gets
01428             // complicated
01429 
01430             rv = snprintf(dest, destStorageBytes, "%" FMT_INT64, src);
01431             if (rv == destStorageBytes) {
01432                 // Would have fit, except for the null termination. Do
01433                 // over into a temporary buf, copy results back.
01434                 // Dreary performance in this case, which may be more common
01435                 // than random chance would predict. If this is
01436                 // not acceptable, see ALTERNATIVE_IMPLEMENTATION below
01437 
01438                 char buf[36];      // should always fit in 21 bytes.
01439                 rv = snprintf(buf, 35, "%" FMT_INT64, src);
01440                 assert(rv == destStorageBytes);
01441                 memcpy(dest, buf, destStorageBytes);
01442             } else if (rv > destStorageBytes) {
01443                 // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01444                 // length), "22001" data exception -- string data, right
01445                 // truncation
01446 
01447                 // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01448                 // length) "22001" data exception -- string data, right
01449                 // truncation
01450                 throw "22001";
01451             }
01452 #endif
01453 
01454 #ifdef ALTERNATIVE_IMPLEMENTATION_UNTESTED_UNPROFILED_AND_PERHAPS_UNHOLY
01455             // assume any int64_t will fit in 22 bytes:
01456             // int64_t has 19 digits, plus space for a - sign and null
01457             // termination is 21 bytes. Add one to round up to 22 bytes.
01458 
01459             // if storage >= 22 bytes, snprintf directly into dest
01460             // as a first-order optimization.
01461             if (destStorageBytes >= 22) {
01462                 rv = snprintf(dest, destStorageBytes, "%" FMT_INT64, src);
01463                 assert(rv <= destStorageBytes); // impossible?
01464                 if (rv > destStorageBytes) {
01465                     // Just in case 22 byte assumption isn't valid
01466 
01467                     // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01468                     // length) "22001" data exception -- string data, right
01469                     // truncation
01470 
01471                     // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01472                     // length), "22001" data exception -- string data, right
01473                     // truncation
01474                     throw "22001";
01475                 }
01476             } else {
01477                 // If src is somewhat less than max or min, it might
01478                 // be short enough to fit into destStorageBytes anyway.
01479                 // Write to buf to get around annoying null termination
01480                 // issue wasting one byte.
01481 
01482                 char buf[24];
01483                 rv = snprintf(buf, destStorageBytes, "%" FMT_INT64, src);
01484                 if (rv > destStorageBytes) {
01485                     // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01486                     // length) "22001" data exception -- string data, right
01487                     // truncation
01488 
01489                     // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01490                     // length) "22001" data exception -- string data, right
01491                     // truncation
01492                     throw "22001";
01493                 }
01494                 memcpy(dest, buf, rv);
01495             }
01496 #endif
01497             if (fixed) {
01498                 memset(dest + rv, padchar, destStorageBytes - rv);
01499                 rv = destStorageBytes;
01500             }
01501 
01502         } else if (CodeUnitBytes == 2) {
01503             // TODO: Add UCS2 here
01504             throw std::logic_error("no UCS2");
01505         } else {
01506             throw std::logic_error("no such encoding");
01507         }
01508     } else {
01509         throw std::logic_error("no UTF8/16/32");
01510     }
01511 
01512     return rv;
01513 }
01514 
01523 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01524 int
01525 SqlStrCastFromExact(
01526     char* dest,
01527     int destStorageBytes,
01528     int64_t src,
01529     int precision,
01530     int scale,
01531     bool fixed,  // e.g. char, else variable (varchar)
01532     int padchar = ' ')
01533 {
01534     int rv;
01535 
01536     if (MaxCodeUnitsPerCodePoint == 1) {
01537         if (CodeUnitBytes == 1) {
01538             // ASCII
01539 
01540             // TODO: Check performance of current implementation against
01541             // TODO: a version with % and /10, etc.
01542 
01543             if (scale == 0) {
01544                 // Scale is 0, same as normal cast
01545                 rv = SqlStrCastFromExact
01546                     <CodeUnitBytes, MaxCodeUnitsPerCodePoint>
01547                     (dest, destStorageBytes, src, fixed, padchar);
01548             } else if (scale > 0) {
01549                 // Positive Scale
01550                 int ndigits, decimal, sign = 0;
01551                 char buf[36];      // #%lld should always fit in 21 bytes.
01552                 rv = snprintf(buf, 35, "%" FMT_INT64, abs(src));
01553                 // snprintf does not return null termination in length
01554                 assert(rv >= 0 && rv <= 35);
01555 
01556                 ndigits = rv;
01557                 if (src < 0) {
01558                     sign = 1;
01559                     rv++;
01560                 }
01561 
01562                 // Figure out where to add decimal point
01563                 decimal = ndigits - scale;
01564                 if (decimal < 0) {
01565                     // Need to pad with 0s
01566                     rv += (-decimal) + 1;
01567                 } else {
01568                     rv += 1;
01569                 }
01570 
01571                 // Check if there is enough space
01572                 if (rv > destStorageBytes) {
01573                     // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01574                     // length) "22001" data exception -- string data, right
01575                     // truncation
01576 
01577                     // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01578                     // length) "22001" data exception -- string data, right
01579                     // truncation
01580                     throw "22001";
01581                 }
01582 
01583                 // Copy into destination buffer, placing the '.' appropriately
01584                 if (sign) {
01585                     dest[0] = '-';
01586                 }
01587 
01588                 if (decimal < 0) {
01589                     int pad = -decimal;
01590                     dest[sign] = '.';
01591                     memset(dest + sign + 1, '0', pad);
01592                     memcpy(dest + sign + 1 + pad, buf, ndigits);
01593                 } else {
01594                     memcpy(dest + sign, buf, decimal);
01595                     dest[decimal + sign] = '.';
01596                     memcpy(dest + sign + 1 + decimal, buf + decimal, scale);
01597                 }
01598 
01599                 if (fixed) {
01600                     memset(dest + rv, padchar, destStorageBytes - rv);
01601                     rv = destStorageBytes;
01602                 }
01603 
01604             } else {
01605                 // Negative Scale
01606                 int nzeros = (src != 0) ? -scale : 0;
01607                 int len;
01608                 char buf[36];      // #%lld should always fit in 21 bytes.
01609                 rv = snprintf(buf, 35, "%" FMT_INT64, src);
01610                 // snprintf does not return null termination in length
01611                 assert(rv >= 0 && rv <= 35);
01612 
01613                 len = rv;
01614 
01615                 // Check if there is enough space
01616                 rv += nzeros;
01617                 if (rv > destStorageBytes) {
01618                     // SQL99 Part 2 Section 6.22 General Rule 8.a.iv (fixed
01619                     // length) "22001" data exception -- string data, right
01620                     // truncation
01621 
01622                     // SQL99 Part 2 Section 6.22 General Rule 9.a.iii (variable
01623                     // length) "22001" data exception -- string data, right
01624                     // truncation
01625                     throw "22001";
01626                 }
01627 
01628                 // Add zeros
01629                 memcpy(dest, buf, len);
01630                 memset(dest + len, '0', nzeros);
01631 
01632                 if (fixed) {
01633                     memset(dest + rv, padchar, destStorageBytes - rv);
01634                     rv = destStorageBytes;
01635                 }
01636             }
01637         } else if (CodeUnitBytes == 2) {
01638             // TODO: Add UCS2 here
01639             throw std::logic_error("no UCS2");
01640         } else {
01641             throw std::logic_error("no such encoding");
01642         }
01643     } else {
01644         throw std::logic_error("no UTF8/16/32");
01645     }
01646 
01647     return rv;
01648 }
01649 
01670 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01671 int
01672 SqlStrCastFromApprox(
01673     char* dest,
01674     int destStorageBytes,
01675     double src,
01676     bool isFloat,
01677     bool fixed,  // e.g. char, else variable (varchar)
01678     int padchar = ' ')
01679 {
01680     int rv;
01681 
01682     if (MaxCodeUnitsPerCodePoint == 1) {
01683         if (CodeUnitBytes == 1) {
01684             // ASCII
01685             if (src == 0.0) {
01686                 // 6.22 General Rule 8, case b i 2 and
01687                 // 6.22 General Rule 9, case b i 2
01688                 if (destStorageBytes >= 3) {
01689                     memcpy(dest, "0E0", 3);
01690                     rv = 3;
01691                 } else {
01692                     // SQL99 Part 2 Section 6.22 General Rule 8.b.iii.4 (fixed
01693                     // length) "22001" data exception - string
01694 
01695                     // SQL99 Part 2 Section 6.22 General Rule 9.b.iii.3
01696                     // (variable length) "22001" data exception -- string data,
01697                     // right truncation
01698                     throw "22001";
01699                 }
01700             } else {
01701                 // Note: can't always snprintf directly into dest, due to
01702                 // null termination wasting a byte.
01703 
01708 
01709                 int max_precision = (isFloat) ? 7 : 16;
01710                 char buf[36];      // #.16E should always fit in 22 bytes.
01711                 rv = snprintf(buf, 35, "%.*E", max_precision, src);
01712 
01713                 // snprintf does not include null termination in length
01714                 assert(rv >= 0 && rv <= 35);
01715 
01716                 if (src > std::numeric_limits<double>::max()) {
01717                     strcpy(buf, "INF");
01718                     rv = 3;
01719                 } else if (src < -std::numeric_limits<double>::max()) {
01720                     strcpy(buf, "-INF");
01721                     rv = 4;
01722                 }
01723 
01724                 // Trim trailing zeros from mantissa, and initial zeros
01725                 // from exponent
01726                 int buflen = rv;
01727                 int last_nonzero = (src < 0) ? 1 : 0;
01728                 int eindex = last_nonzero + max_precision + 2;
01729                 int eneg = 0;
01730                 int explen = 1;
01731 
01732                 if ((buflen > eindex) && buf[eindex] == 'E') {
01733                     // Normal number with exponent
01734 
01735                     // Round up if needed
01736                     if ((buf[eindex - 1] >= '5') && (buf[eindex - 1] <= '9')) {
01737                         buf[eindex - 1] = '0';
01738                         for (int i = eindex - 2; i >= last_nonzero; i--) {
01739                             if (buf[i] == '9') {
01740                                 buf[i] = '0';
01741                             } else if (buf[i] != '.') {
01742                                 buf[i]++;
01743                                 break;
01744                             }
01745                         }
01746 
01747                         // See if initial digit overflowed (very unlikely)
01748                         if (buf[last_nonzero] == '0') {
01749                             buf[last_nonzero] = '1';
01750                             for (int i = eindex - 1;
01751                                  i > last_nonzero + 2;
01752                                  i--)
01753                             {
01754                                 buf[i] = buf[i - 1];
01755                             }
01756                             buf[last_nonzero + 2] = '0';
01757 
01758                             // increment exponent
01759                             int exp;
01760                             sscanf(buf + eindex + 1, "%d", &exp);
01761                             sprintf(buf + eindex + 1, "%d", exp + 1);
01762                             buflen = strlen(buf);
01763                         }
01764                     }
01765 
01766                     // Ignore last digit
01767                     // only need 16 digits in total, 15 digits after '.'
01768                     for (int i = eindex - 2; i >= 0; i--) {
01769                         if ((buf[i] >= '1') && (buf[i] <= '9')) {
01770                             last_nonzero = i;
01771                             break;
01772                         }
01773                     }
01774                     eneg = (buf[eindex + 1] == '-') ? 1 : 0;
01775                     for (int i = eindex + 1; i < buflen; i++) {
01776                         if ((buf[i] >= '1') && (buf[i] <= '9')) {
01777                             explen = buflen - i;
01778                             break;
01779                         }
01780                     }
01781 
01782                     // final length = mantissa + 'E' + optional '-' + explen
01783                     rv = last_nonzero + 1 + 1 + eneg + explen;
01784                 } else {
01785                     // Special number (INF, -INF, NaN)
01786                     rv = buflen;
01787                 }
01788 
01789                 if (rv <= destStorageBytes) {
01790                     if (rv == buflen) {
01791                         // Copy all
01792                         memcpy(dest, buf, rv);
01793                     } else {
01794                         // Don't copy trailing zeros of mantissa
01795                         memcpy(dest, buf, last_nonzero + 1);
01796                         rv = last_nonzero + 1;
01797                         dest[rv++] = 'E';
01798                         if (eneg) {
01799                             dest[rv++] = '-';
01800                         }
01801                         // Copy exponent
01802                         memcpy(dest + rv, buf + (buflen - explen), explen);
01803                         rv += explen;
01804                     }
01805                 } else {
01806                     // SQL99 Part 2 Section 6.22 General Rule 8.b.iii.4 (fixed
01807                     // length) "22001" data exception - string
01808 
01809                     // SQL99 Part 2 Section 6.22 General Rule 9.b.iii.3
01810                     // (variable length) "22001" data exception -- string data,
01811                     // right truncation
01812                     throw "22001";
01813                 }
01814             }
01815 
01816             if (fixed) {
01817                 memset(dest + rv, padchar, destStorageBytes - rv);
01818                 rv = destStorageBytes;
01819             }
01820 
01821         } else if (CodeUnitBytes == 2) {
01822             // TODO: Add UCS2 here
01823             throw std::logic_error("no UCS2");
01824         } else {
01825             throw std::logic_error("no such encoding");
01826         }
01827     } else {
01828         throw std::logic_error("no UTF8/16/32");
01829     }
01830 
01831     return rv;
01832 }
01833 
01834 
01842 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01843 int
01844 SqlStrCastToVarChar(
01845     char *dest,
01846     int destStorageBytes,
01847     char *src,
01848     int srcLenBytes,
01849     int *rightTruncWarning = NULL,
01850     int padchar = ' ')
01851 {
01852     int rv;
01853 
01854     if (MaxCodeUnitsPerCodePoint == 1) {
01855         if (CodeUnitBytes == 1) {
01856             // ASCII
01857             if (srcLenBytes <= destStorageBytes) {
01858                 memcpy(dest, src, srcLenBytes);
01859                 rv = srcLenBytes;
01860 
01861                 if (srcLenBytes < destStorageBytes) {
01862                     memset(
01863                         dest + srcLenBytes,
01864                         padchar,
01865                         destStorageBytes - srcLenBytes);
01866 
01867                     // Do not alter rv.
01868                 }
01869             } else {
01870                 memcpy(dest, src, destStorageBytes);
01871                 rv = destStorageBytes;
01872 
01873                 for (char *trunc = src + destStorageBytes,
01874                          *end = src + srcLenBytes;
01875                      trunc != end;
01876                      trunc++)
01877                 {
01878                     if (*trunc != padchar) {
01879                         // Spec says this is just a warning (see SQL99 Part 2
01880                         // Section 6.22 General Rule 8.c.ii).  Let the caller
01881                         // handle it.
01882                         if (rightTruncWarning != NULL) {
01883                             *rightTruncWarning = 1;
01884                         }
01885                     }
01886                 }
01887             }
01888         } else if (CodeUnitBytes == 2) {
01889             // TODO: Add UCS2 here
01890             throw std::logic_error("no UCS2");
01891         } else {
01892             throw std::logic_error("no such encoding");
01893         }
01894     } else {
01895         throw std::logic_error("no UTF8/16/32");
01896     }
01897 
01898     return rv;
01899 }
01900 
01901 
01909 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01910 int
01911 SqlStrCastToChar(
01912     char *dest,
01913     int destStorageBytes,
01914     char *src,
01915     int srcLenBytes,
01916     int *rightTruncWarning = NULL,
01917     int padchar = ' ')
01918 {
01919     int rv;
01920 
01921     if (MaxCodeUnitsPerCodePoint == 1) {
01922         if (CodeUnitBytes == 1) {
01923             // ASCII
01924             if (srcLenBytes <= destStorageBytes) {
01925                 memcpy(dest, src, srcLenBytes);
01926                 rv = srcLenBytes;
01927 
01928                 if (srcLenBytes < destStorageBytes) {
01929                     memset(
01930                         dest + srcLenBytes,
01931                         padchar,
01932                         destStorageBytes - srcLenBytes);
01933                     rv = destStorageBytes;
01934                 }
01935             } else {
01936                 memcpy(dest, src, destStorageBytes);
01937                 rv = destStorageBytes;
01938 
01939                 for (char *trunc = src + destStorageBytes,
01940                          *end = src + srcLenBytes;
01941                      trunc != end;
01942                      trunc++)
01943                 {
01944                     if (*trunc != padchar) {
01945                         // Spec says this is just a warning (see SQL99 Part 2
01946                         // Section 6.22 General Rule 9.c.ii).  Let the caller
01947                         // handle it.
01948                         if (rightTruncWarning != NULL) {
01949                             *rightTruncWarning = 1;
01950                         }
01951                     }
01952                 }
01953             }
01954         } else if (CodeUnitBytes == 2) {
01955             // TODO: Add UCS2 here
01956             throw std::logic_error("no UCS2");
01957         } else {
01958             throw std::logic_error("no such encoding");
01959         }
01960     } else {
01961         throw std::logic_error("no UTF8/16/32");
01962     }
01963 
01964     return rv;
01965 }
01966 
01967 
01976 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
01977 bool
01978 SqlStrCastToBoolean(
01979     char const * const str,
01980     int strLenBytes,
01981     int padChar = ' ')
01982 {
01983     bool rv;
01984     if (MaxCodeUnitsPerCodePoint == 1) {
01985         if (CodeUnitBytes == 1) {
01986             // ASCII
01987             char const *ptr = str;
01988             char const *end = str + strLenBytes;
01989 
01990             // Skip past any leading whitespace.
01991             while (ptr < end && *ptr == padChar) {
01992                 ptr++;
01993             }
01994             // Check if true, false, or unknown
01995             if ((end - ptr) >= 4 && strncasecmp(ptr, "TRUE", 4) == 0) {
01996                 rv = true;
01997                 ptr += 4; // advance past true
01998             } else if ((end - ptr) >= 5 && strncasecmp(ptr, "FALSE", 5) == 0) {
01999                 rv = false;
02000                 ptr += 5; // advance past false;
02001             } else {
02002                 // SQL2003 Part 2 Section 6.12 General Rule 20.a.ii "22018"
02003                 // data exception -- invalid character value for cast
02004                 throw "22018";
02005             }
02006 
02007             // verify that trailing characters are all padChar
02008             while (ptr < end) {
02009                 if (*ptr != padChar) {
02010                     // SQL2003 Part 2 Section 6.12 General Rule 20.a.ii "22018"
02011                     // data exception -- invalid character value for cast
02012                     throw "22018";
02013                 }
02014                 ptr++;
02015             }
02016         } else if (CodeUnitBytes == 2) {
02017             // TODO: Add UCS2 here
02018             throw std::logic_error("no UCS2");
02019         } else {
02020             throw std::logic_error("no such encoding");
02021         }
02022     } else {
02023         throw std::logic_error("no UTF8/16/32");
02024     }
02025 
02026     return rv;
02027 }
02028 
02035 template <int CodeUnitBytes, int MaxCodeUnitsPerCodePoint>
02036 int
02037 SqlStrCastFromBoolean(
02038     char* dest,
02039     int destStorageBytes,
02040     bool src,
02041     bool fixed,  // e.g. char, else variable (varchar)
02042     int padchar = ' ')
02043 {
02044     int rv;
02045 
02046     if (MaxCodeUnitsPerCodePoint == 1) {
02047         if (CodeUnitBytes == 1) {
02048             // ASCII
02049 
02050             // SQL2003 6.12 General Rule 10, case e i,ii  and
02051             // SQL2003 6.12 General Rule 11, case e i,ii
02052             if (src && destStorageBytes >= 4) {
02053                 memcpy(dest, "TRUE", 4);
02054                 rv = 4;
02055             } else if (!src && destStorageBytes >= 5) {
02056                 memcpy(dest, "FALSE", 5);
02057                 rv = 5;
02058             } else {
02059                 // SQL2003 Part 2 Section 6.12 General Rule
02060                 // 10.e.iii (fixed length) and 11.e.iii (variable length)
02061                 // "22018" data exception -- invalid character value for cast
02062                 throw "22018";
02063             }
02064 
02065             if (fixed) {
02066                 memset(dest + rv, padchar, destStorageBytes - rv);
02067                 rv = destStorageBytes;
02068             }
02069 
02070         } else if (CodeUnitBytes == 2) {
02071             // TODO: Add UCS2 here
02072             throw std::logic_error("no UCS2");
02073         } else {
02074             throw std::logic_error("no such encoding");
02075         }
02076     } else {
02077         throw std::logic_error("no UTF8/16/32");
02078     }
02079 
02080     return rv;
02081 }
02082 
02083 
02084 FENNEL_END_NAMESPACE
02085 
02086 #endif
02087 
02088 // End SqlString.h
02089 

Generated on Mon Jun 22 04:00:17 2009 for Fennel by  doxygen 1.5.1