00001 /* 00002 // $Id: //open/dev/fennel/hashexe/LhxAggExecStream.h#2 $ 00003 // Fennel is a library of data storage and processing components. 00004 // Copyright (C) 2006-2009 The Eigenbase Project 00005 // Copyright (C) 2009-2009 SQLstream, Inc. 00006 // Copyright (C) 2006-2009 LucidEra, Inc. 00007 // 00008 // This program is free software; you can redistribute it and/or modify it 00009 // under the terms of the GNU General Public License as published by the Free 00010 // Software Foundation; either version 2 of the License, or (at your option) 00011 // any later version approved by The Eigenbase Project. 00012 // 00013 // This program is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 // GNU General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU General Public License 00019 // along with this program; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 */ 00022 00023 #ifndef Fennel_LhxAggExecStream_Included 00024 #define Fennel_LhxAggExecStream_Included 00025 00026 #include "fennel/exec/ConduitExecStream.h" 00027 #include "fennel/exec/SortedAggExecStream.h" 00028 #include "fennel/hashexe/LhxHashBase.h" 00029 #include "fennel/hashexe/LhxHashTable.h" 00030 #include "fennel/hashexe/LhxPartition.h" 00031 00032 FENNEL_BEGIN_NAMESPACE 00033 00034 // REVIEW jvs 25-Aug-2006: Seems like a common base AggExecStreamParams is in 00035 // order (with SortedAggExecStreamParams left empty for now) 00039 struct LhxAggExecStreamParams : public SortedAggExecStreamParams 00040 { 00044 SharedSegment pTempSegment; 00045 00049 bool enableSubPartStat; 00050 00055 uint forcePartitionLevel; 00056 00057 // REVIEW jvs 25-Aug-2006: When using Javadoc/doxygen comments, 00058 // it's important to remember that each comment will show up 00059 // separately in the generated class documentation. Below, 00060 // the first comment says "Initial stats ...", which 00061 // really applies to both comments. Instead, put a summary 00062 // at the class-level comment, like "The fields cndGroupByKeys and numRows 00063 // are provided by the optimizer to help with estimating 00064 // resource allocation requirements" and then on the field-level 00065 // comments, just say what they are, e.g. "Estimate for number of rows from 00066 // the build input, etc." (No need to repeat the field name inside 00067 // of the field-level comments.) 00074 RecordNum cndGroupByKeys; 00075 00079 RecordNum numRows; 00080 }; 00081 00090 class FENNEL_HASHEXE_EXPORT LhxAggExecStream 00091 : public ConduitExecStream 00092 { 00093 // REVIEW jvs 26-Aug-2006: Fennel convention for enum names is 00094 // all uppercase with underscores 00095 00096 enum LhxAggState { 00097 ForcePartitionBuild, Build, Produce, ProducePending, 00098 Partition, CreateChildPlan, GetNextPlan, Done 00099 }; 00100 00104 TupleData inputTuple; 00105 00109 TupleData outputTuple; 00110 00111 // REVIEW jvs 25-Aug-2006: This member is only accessed within 00112 // one method (execute). Wouldn't it be easier to make it a local 00113 // variable there so it doesn't have to be reset? 00117 uint numTuplesProduced; 00118 00122 LhxHashInfo hashInfo; 00123 00127 LhxHashTable hashTable; 00128 LhxHashTableReader hashTableReader; 00129 00133 BlockNum numBlocksHashTable; 00134 00139 BlockNum numMiscCacheBlocks; 00140 00141 // REVIEW jvs 25-Aug-2006: Next three fields need comments, maybe 00142 // a reference to somewhere else explaining the plan concept. Is 00143 // it true that isTopPlan can be derived from (curPlan == rootPlan.get())? 00144 00145 /* 00146 * Plan 00147 */ 00148 bool isTopPlan; 00149 SharedLhxPlan rootPlan; 00150 00151 // REVIEW jvs 25-Aug-2006: If there's a valid reason not to declare this as 00152 // a SharedLhxPlan (like performance, which seems justified), then that 00153 // reason should be explained, since in general mixing shared and 00154 // non-shared pointers can be error-prone. 00155 LhxPlan *curPlan; 00156 00157 // REVIEW jvs 25-Aug-2006: This will always be 0, right? In that case, use 00158 // a static const to make it obvious, and assert accordingly in ::prepare. 00159 // And then use BUILD_INPUT_INDEX naming convention. 00160 00164 uint buildInputIndex; 00165 00169 LhxPartitionInfo partInfo; 00170 00174 SharedLhxPartition buildPart; 00175 00179 LhxPartitionReader buildReader; 00180 00184 bool enableSubPartStat; 00185 00190 uint forcePartitionLevel; 00191 00195 LhxAggState aggState; 00196 00197 // REVIEW jvs 25-Aug-2006: This seems fairly useless in LhxAggExecStream; 00198 // it only ever gets set to Produce. I think it's vestigial from 00199 // LhxJoinExecStream. 00203 LhxAggState nextState; 00204 00205 00206 // REVIEW jvs 25-Aug-2006: This is so temporary that it is never 00207 // even referenced anywhere? 00208 /* 00209 * Some temporary variables. 00210 */ 00211 uint groupByKeyCount; 00212 00213 // REVIEW jvs 25-Aug-2006: Next two fields need comments, maybe 00214 // a reference to somewhere else explaining the concept of partial 00215 // aggregation. 00216 00217 AggComputerList aggComputers; 00218 AggComputerList partialAggComputers; 00219 00220 // implement ExecStream 00221 virtual void closeImpl(); 00222 00223 /* 00224 * Set up hashInfo from exec stream parameters. 00225 */ 00226 void setHashInfo(LhxAggExecStreamParams const ¶ms); 00227 00228 /* 00229 * Set up the aggregate computers and partial aggregate computers used by 00230 * the hash table. 00231 */ 00232 void setAggComputers( 00233 LhxHashInfo &hashInfo, 00234 AggInvocationList const &aggInvocations); 00235 00236 public: 00237 // implement ExecStream 00238 virtual void prepare(LhxAggExecStreamParams const ¶ms); 00239 virtual void open(bool restart); 00240 virtual ExecStreamResult execute(ExecStreamQuantum const &quantum); 00241 00242 virtual void getResourceRequirements( 00243 ExecStreamResourceQuantity &minQuantity, 00244 ExecStreamResourceQuantity &optQuantity, 00245 ExecStreamResourceSettingType &optType); 00246 00247 virtual void setResourceAllocation( 00248 ExecStreamResourceQuantity &quantity); 00249 00250 }; 00251 00252 FENNEL_END_NAMESPACE 00253 00254 #endif 00255 00256 // End LhxAggExecStream.h