LhxAggExecStream.h

Go to the documentation of this file.
00001 /*
00002 // $Id: //open/dev/fennel/hashexe/LhxAggExecStream.h#2 $
00003 // Fennel is a library of data storage and processing components.
00004 // Copyright (C) 2006-2009 The Eigenbase Project
00005 // Copyright (C) 2009-2009 SQLstream, Inc.
00006 // Copyright (C) 2006-2009 LucidEra, Inc.
00007 //
00008 // This program is free software; you can redistribute it and/or modify it
00009 // under the terms of the GNU General Public License as published by the Free
00010 // Software Foundation; either version 2 of the License, or (at your option)
00011 // any later version approved by The Eigenbase Project.
00012 //
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public License for more details.
00017 //
00018 // You should have received a copy of the GNU General Public License
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021 */
00022 
00023 #ifndef Fennel_LhxAggExecStream_Included
00024 #define Fennel_LhxAggExecStream_Included
00025 
00026 #include "fennel/exec/ConduitExecStream.h"
00027 #include "fennel/exec/SortedAggExecStream.h"
00028 #include "fennel/hashexe/LhxHashBase.h"
00029 #include "fennel/hashexe/LhxHashTable.h"
00030 #include "fennel/hashexe/LhxPartition.h"
00031 
00032 FENNEL_BEGIN_NAMESPACE
00033 
00034 // REVIEW jvs 25-Aug-2006: Seems like a common base AggExecStreamParams is in
00035 // order (with SortedAggExecStreamParams left empty for now)
00039 struct LhxAggExecStreamParams : public SortedAggExecStreamParams
00040 {
00044     SharedSegment pTempSegment;
00045 
00049     bool enableSubPartStat;
00050 
00055     uint forcePartitionLevel;
00056 
00057     // REVIEW jvs 25-Aug-2006:  When using Javadoc/doxygen comments,
00058     // it's important to remember that each comment will show up
00059     // separately in the generated class documentation.  Below,
00060     // the first comment says "Initial stats ...", which
00061     // really applies to both comments.  Instead, put a summary
00062     // at the class-level comment, like "The fields cndGroupByKeys and numRows
00063     // are provided by the optimizer to help with estimating
00064     // resource allocation requirements" and then on the field-level
00065     // comments, just say what they are, e.g. "Estimate for number of rows from
00066     // the build input, etc."  (No need to repeat the field name inside
00067     // of the field-level comments.)
00074     RecordNum cndGroupByKeys;
00075 
00079     RecordNum numRows;
00080 };
00081 
00090 class FENNEL_HASHEXE_EXPORT LhxAggExecStream
00091     : public ConduitExecStream
00092 {
00093     // REVIEW jvs 26-Aug-2006:  Fennel convention for enum names is
00094     // all uppercase with underscores
00095 
00096     enum LhxAggState {
00097         ForcePartitionBuild, Build, Produce, ProducePending,
00098         Partition, CreateChildPlan, GetNextPlan, Done
00099     };
00100 
00104     TupleData inputTuple;
00105 
00109     TupleData outputTuple;
00110 
00111     // REVIEW jvs 25-Aug-2006:  This member is only accessed within
00112     // one method (execute).  Wouldn't it be easier to make it a local
00113     // variable there so it doesn't have to be reset?
00117     uint numTuplesProduced;
00118 
00122     LhxHashInfo hashInfo;
00123 
00127     LhxHashTable hashTable;
00128     LhxHashTableReader hashTableReader;
00129 
00133     BlockNum numBlocksHashTable;
00134 
00139     BlockNum numMiscCacheBlocks;
00140 
00141     // REVIEW jvs 25-Aug-2006:  Next three fields need comments, maybe
00142     // a reference to somewhere else explaining the plan concept.  Is
00143     // it true that isTopPlan can be derived from (curPlan == rootPlan.get())?
00144 
00145     /*
00146      * Plan
00147      */
00148     bool isTopPlan;
00149     SharedLhxPlan rootPlan;
00150 
00151     // REVIEW jvs 25-Aug-2006: If there's a valid reason not to declare this as
00152     // a SharedLhxPlan (like performance, which seems justified), then that
00153     // reason should be explained, since in general mixing shared and
00154     // non-shared pointers can be error-prone.
00155     LhxPlan *curPlan;
00156 
00157     // REVIEW jvs 25-Aug-2006: This will always be 0, right?  In that case, use
00158     // a static const to make it obvious, and assert accordingly in ::prepare.
00159     // And then use BUILD_INPUT_INDEX naming convention.
00160 
00164     uint buildInputIndex;
00165 
00169     LhxPartitionInfo partInfo;
00170 
00174     SharedLhxPartition buildPart;
00175 
00179     LhxPartitionReader buildReader;
00180 
00184     bool enableSubPartStat;
00185 
00190     uint forcePartitionLevel;
00191 
00195     LhxAggState aggState;
00196 
00197     // REVIEW jvs 25-Aug-2006: This seems fairly useless in LhxAggExecStream;
00198     // it only ever gets set to Produce.  I think it's vestigial from
00199     // LhxJoinExecStream.
00203     LhxAggState nextState;
00204 
00205 
00206     // REVIEW jvs 25-Aug-2006: This is so temporary that it is never
00207     // even referenced anywhere?
00208     /*
00209      * Some temporary variables.
00210      */
00211     uint groupByKeyCount;
00212 
00213     // REVIEW jvs 25-Aug-2006:  Next two fields need comments, maybe
00214     // a reference to somewhere else explaining the concept of partial
00215     // aggregation.
00216 
00217     AggComputerList aggComputers;
00218     AggComputerList partialAggComputers;
00219 
00220     // implement ExecStream
00221     virtual void closeImpl();
00222 
00223     /*
00224      * Set up hashInfo from exec stream parameters.
00225      */
00226     void setHashInfo(LhxAggExecStreamParams const &params);
00227 
00228     /*
00229      * Set up the aggregate computers and partial aggregate computers used by
00230      * the hash table.
00231      */
00232     void setAggComputers(
00233         LhxHashInfo &hashInfo,
00234         AggInvocationList const &aggInvocations);
00235 
00236 public:
00237     // implement ExecStream
00238     virtual void prepare(LhxAggExecStreamParams const &params);
00239     virtual void open(bool restart);
00240     virtual ExecStreamResult execute(ExecStreamQuantum const &quantum);
00241 
00242     virtual void getResourceRequirements(
00243         ExecStreamResourceQuantity &minQuantity,
00244         ExecStreamResourceQuantity &optQuantity,
00245         ExecStreamResourceSettingType &optType);
00246 
00247     virtual void setResourceAllocation(
00248         ExecStreamResourceQuantity &quantity);
00249 
00250 };
00251 
00252 FENNEL_END_NAMESPACE
00253 
00254 #endif
00255 
00256 // End LhxAggExecStream.h

Generated on Mon Jun 22 04:00:19 2009 for Fennel by  doxygen 1.5.1