LhxHashGenerator.cpp

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/hashexe/LhxHashGenerator.cpp#2 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2006-2009 The Eigenbase Project
00005 // Copyright (C) 2009-2009 SQLstream, Inc.
00006 // Copyright (C) 2006-2009 LucidEra, Inc.
00007 //
00008 // This program is free software; you can redistribute it and/or modify it
00009 // under the terms of the GNU General Public License as published by the Free
00010 // Software Foundation; either version 2 of the License, or (at your option)
00011 // any later version approved by The Eigenbase Project.
00012 //
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public License for more details.
00017 //
00018 // You should have received a copy of the GNU General Public License
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 */
00022 
00023 #include "fennel/common/CommonPreamble.h"
00024 #include "fennel/common/FennelExcn.h"
00025 #include "fennel/hashexe/LhxHashGenerator.h"
00026 #include <sstream>
00027 
00028 using namespace std;
00029 
00030 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/hashexe/LhxHashGenerator.cpp#2 $");
00031 
00032 static uint8_t LhxHashGeneratorMagicTable[256] =
00033 {
00034     1,    87,   49,   12,   176,  178,  102,  166,  121,  193,  6,    84,
00035     249,  230,  44,   163,  14,   197,  213,  181,  161,  85,   218,  80,
00036     64,   239,  24,   226,  236,  142,  38,   200,  110,  177,  104,  103,
00037     141,  253,  255,  50,   77,   101,  81,   18,   45,   96,   31,   222,
00038     25,   107,  190,  70,   86,   237,  240,  34,   72,   242,  20,   226,
00039     236,  142,  38,   235,  97,   234,  57,   22,   60,   250,  82,   175,
00040     208,  5,    127,  199,  111,  62,   135,  248,  174,  169,  211,  58,
00041     66,   154,  106,  195,  245,  171,  17,   187,  182,  179,  0,    243,
00042     132,  56,   148,  75,   128,  133,  158,  100,  130,  126,  91,   13,
00043     153,  246,  216,  219,  119,  68,   223,  78,   83,   88,   201,  99,
00044     122,  11,   92,   32,   136,  114,  52,   10,   138,  30,   48,   183,
00045     156,  35,   61,   26,   143,  74,   251,  94,   129,  162,  63,   152,
00046     170,  7,    115,  167,  241,  206,  3,    150,  55,   59,   151,  220,
00047     90,   53,   23,   131,  125,  173,  15,   238,  79,   95,   89,   16,
00048     105,  137,  225,  224,  217,  160,  37,   123,  118,  73,   2,    157,
00049     46,   116,  9,    145,  134,  228,  207,  212,  202,  215,  69,   229,
00050     27,   188,  67,   124,  168,  252,  42,   4,    29,   108,  21,   247,
00051     19,   205,  39,   203,  233,  40,   186,  147,  198,  192,  155,  33,
00052     164,  191,  98,   204,  165,  180,  117,  76,   140,  36,   210,  172,
00053     41,   54,   159,  8,    185,  232,  113,  196,  231,  47,   146,  120,
00054     51,   65,   28,   144,  254,  221,  93,   189,  194,  139,  112,  43,
00055     71,   109,  184,  209
00056 };
00057 
00058 void LhxHashGenerator::init(uint levelInit)
00059 {
00060     /*
00061      * The seed computation can generate different seeds for only levels 0 - 63
00062      * Level 64 will have the same seed as level 0. So if recursive
00063      * partitioning goes to 64 level, no further partitioning is possible.
00064      */
00065     if (levelInit > 63) {
00066         ostringstream errMsg;
00067         errMsg << " Hash recursion level can not be deeper than 63";
00068         throw FennelExcn(errMsg.str());
00069     }
00070 
00071     level = levelInit;
00072     magicTable = LhxHashGeneratorMagicTable;
00073 
00074     uint base = level * 4;
00075     hashValueSeed
00076         = (uint8_t(base) << 24)
00077         | (uint8_t(base + 1) << 16)
00078         | (uint8_t(base + 2) << 8)
00079         | (uint8_t(base + 3));
00080 }
00081 
00082 // REVIEW jvs 25-Aug-2006: Awww, the fancy bit-twiddling from Broadbase is
00083 // gone.  Boris L. will be so disappointed.  I guess it will have to be
00084 // someone's science fair project to see if any of it was worthwhile.
00085 
00086 void LhxHashGenerator::hashOneBuffer(
00087     uint &hashValue,
00088     PConstBuffer pBuf,
00089     uint bufSize)
00090 {
00091     uint numValueBytes = sizeof(uint);
00092     uint8_t byteForNull = 0xff;
00093 
00094     if (pBuf == NULL) {
00095         bufSize = 1;
00096         pBuf = &byteForNull;
00097     }
00098 
00099     for (int count = 0; count < bufSize; count ++) {
00100         PBuffer pByte = (PBuffer) &hashValue;
00101 
00102         for (int i = 0; i < numValueBytes; i++) {
00103             *pByte = magicTable[(*pByte) ^ (*pBuf)];
00104             pByte ++;
00105         }
00106 
00107         pBuf ++;
00108     }
00109 }
00110 
00111 void LhxHashGenerator::hashOneColumn(
00112     uint &hashValue,
00113     TupleDatum const &inputCol,
00114     LhxHashTrim isVarChar)
00115 {
00116     uint trimmedLength = inputCol.cbData;
00117     PConstBuffer pData = inputCol.pData;
00118 
00119     if (pData) {
00120         /*
00121          * Only hash to the trimmed value.
00122          */
00123         if (isVarChar == HASH_TRIM_VARCHAR) {
00124             PConstBuffer pChar = pData + trimmedLength - 1;
00125             while ((pChar >= pData) && (*pChar == ' ')) {
00126                 --pChar;
00127             }
00128             trimmedLength = pChar - pData + 1;
00129         } else if (isVarChar == HASH_TRIM_UNICODE_VARCHAR) {
00130             PConstBuffer pChar = pData + trimmedLength - 2;
00131             while ((pChar >= pData)
00132                 && (*reinterpret_cast<uint16_t const *>(pChar) == ' '))
00133             {
00134                 pChar -= 2;
00135             }
00136             trimmedLength = pChar - pData + 2;
00137         }
00138     }
00139 
00140     // REVIEW jvs 25-Aug-2006:  Since the call below uses
00141     // sizeof(TupleStorageByteLength), shouldn't trimmedLength
00142     // be declared to match?
00143 
00144     /*
00145      * First hash the length
00146      * However, ignore length field if pData is NULL.
00147      */
00148     if (pData) {
00149         hashOneBuffer(
00150             hashValue,
00151             (PConstBuffer) &(trimmedLength),
00152             sizeof(TupleStorageByteLength));
00153     }
00154 
00155     /*
00156      * then hash the data buffer
00157      */
00158     hashOneBuffer(
00159         hashValue,
00160         pData,
00161         trimmedLength);
00162 }
00163 
00164 uint LhxHashGenerator::hash(
00165     TupleData const &inputTuple,
00166     TupleProjection const &keyProjection,
00167     vector<LhxHashTrim> const &isKeyColVarChar)
00168 {
00169     uint keyLength = keyProjection.size();
00170 
00171     /*
00172      * get initial hash value of 4 bytes.
00173      */
00174     uint hashValue = hashValueSeed;
00175 
00176     for (int i = 0; i < keyLength; i++) {
00177         TupleDatum const &col = inputTuple[keyProjection[i]];
00178         hashOneColumn(hashValue, col, isKeyColVarChar[i]);
00179     }
00180 
00181     /*
00182      * Note: if key size == 0 then the hash value is the same as the seed value.
00183      * This is the case for single group aggregate.
00184      */
00185     return hashValue;
00186 }
00187 
00188 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/hashexe/LhxHashGenerator.cpp#2 $");
00189 
00190 // End LhxHashGenerator.cpp

Generated on Mon Jun 22 04:00:19 2009 for Fennel by  doxygen 1.5.1