您的位置：首页 > 数据库

PostgreSQL在何处处理 sql查询之四十六

2013-06-04 09:19 591 查看

接前面，再上溯：set_base_rel_pathlists --> set_rel_pathlist

/*
* set_base_rel_pathlists
*      Finds all paths available for scanning each base-relation entry.
*      Sequential scan and any available indices are considered.
*      Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlists(PlannerInfo *root)
{

//fprintf(stderr, "set_base_rel_pathlists... by process %d\n",getpid());
Index        rti;

for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];

/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;

Assert(rel->relid == rti);        /* sanity check on array */

/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;

set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
}
}

再上溯：make_one_rel --> set_base_rel_pathlists

/*
* make_one_rel
*      Finds all possible access paths for executing a query, returning a
*      single rel that represents the join of all base rels in the query.
*/
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
Index        rti;

/*
* Construct the all_baserels Relids set.
*/
root->all_baserels = NULL;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];

/* there may be empty slots corresponding to non-baserel RTEs */
if (brel == NULL)
continue;

Assert(brel->relid == rti);        /* sanity check on array */

/* ignore RTEs that are "other rels" */
if (brel->reloptkind != RELOPT_BASEREL)
continue;

root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
}

/*
* Generate access paths for the base rels.
*/
set_base_rel_sizes(root);
set_base_rel_pathlists(root);

/*
* Generate access paths for the entire join tree.
*/
rel = make_rel_from_joinlist(root, joinlist);

/*
* The result should join all and only the query's base rels.
*/
Assert(bms_equal(rel->relids, root->all_baserels));

return rel;
}

再上溯：query_planner --> make_one_rel

/*
* query_planner
*      Generate a path (that is, a simplified plan) for a basic query,
*      which may involve joins but not any fancier features.
*
* Since query_planner does not handle the toplevel processing (grouping,
* sorting, etc) it cannot select the best path by itself.    It selects
* two paths: the cheapest path that produces all the required tuples,
* independent of any ordering considerations, and the cheapest path that
* produces the expected fraction of the required tuples in the required
* ordering, if there is a path that is cheaper for this than just sorting
* the output of the cheapest overall path.  The caller (grouping_planner)
* will make the final decision about which to use.
*
* Input parameters:
* root describes the query to plan
* tlist is the target list the query should produce
*        (this is NOT necessarily root->parse->targetList!)
* tuple_fraction is the fraction of tuples we expect will be retrieved
* limit_tuples is a hard limit on number of tuples to retrieve,
*        or -1 if no limit
*
* Output parameters:
* *cheapest_path receives the overall-cheapest path for the query
* *sorted_path receives the cheapest presorted path for the query,
*                if any (NULL if there is no useful presorted path)
* *num_groups receives the estimated number of groups, or 1 if query
*                does not use grouping
*
* Note: the PlannerInfo node also includes a query_pathkeys field, which is
* both an input and an output of query_planner().    The input value signals
* query_planner that the indicated sort order is wanted in the final output
* plan.  But this value has not yet been "canonicalized", since the needed
* info does not get computed until we scan the qual clauses.  We canonicalize
* it as soon as that task is done.  (The main reason query_pathkeys is a
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
* Note: the PlannerInfo node includes other pathkeys fields besides
* query_pathkeys, all of which need to be canonicalized once the info is
* available.  See canonicalize_all_pathkeys.
*
* tuple_fraction is interpreted as follows:
*      0: expect all tuples to be retrieved (normal case)
*      0 < tuple_fraction < 1: expect the given fraction of tuples available
*        from the plan to be retrieved
*      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
*        expected to be retrieved (ie, a LIMIT specification)
* Note that a nonzero tuple_fraction could come from outer context; it is
* therefore not redundant with limit_tuples.  We use limit_tuples to determine
* whether a bounded sort can be used at runtime.
*/
void
query_planner(PlannerInfo *root, List *tlist,
double tuple_fraction, double limit_tuples,
Path **cheapest_path, Path **sorted_path,
double *num_groups)
{
Query       *parse = root->parse;
List       *joinlist;
RelOptInfo *final_rel;
Path       *cheapestpath;
Path       *sortedpath;
Index        rti;
double        total_pages;

/* Make tuple_fraction, limit_tuples accessible to lower-level routines */
root->tuple_fraction = tuple_fraction;
root->limit_tuples = limit_tuples;

*num_groups = 1;            /* default result */

/*
* If the query has an empty join tree, then it's something easy like
* "SELECT 2+2;" or "INSERT ... VALUES()".    Fall through quickly.
*/
if (parse->jointree->fromlist == NIL)
{
/* We need a trivial path result */
*cheapest_path = (Path *)
create_result_path((List *) parse->jointree->quals);
*sorted_path = NULL;

/*
* We still are required to canonicalize any pathkeys, in case it's
* something like "SELECT 2+2 ORDER BY 1".
*/
root->canon_pathkeys = NIL;
canonicalize_all_pathkeys(root);
return;
}

/*
* Init planner lists to empty.
*
* NOTE: append_rel_list was set up by subquery_planner, so do not touch
* here; eq_classes and minmax_aggs may contain data already, too.
*/
root->join_rel_list = NIL;
root->join_rel_hash = NULL;
root->join_rel_level = NULL;
root->join_cur_level = 0;
root->canon_pathkeys = NIL;
root->left_join_clauses = NIL;
root->right_join_clauses = NIL;
root->full_join_clauses = NIL;
root->join_info_list = NIL;
root->placeholder_list = NIL;
root->initial_rels = NIL;

/*
* Make a flattened version of the rangetable for faster access (this is
* OK because the rangetable won't change any more), and set up an empty
* array for indexing base relations.
*/
setup_simple_rel_arrays(root);

/*
* Construct RelOptInfo nodes for all base relations in query, and
* indirectly for all appendrel member relations ("other rels").  This
* will give us a RelOptInfo for every "simple" (non-join) rel involved in
* the query.
*
* Note: the reason we find the rels by searching the jointree and
* appendrel list, rather than just scanning the rangetable, is that the
* rangetable may contain RTEs for rels not actively part of the query,
* for example views.  We don't want to make RelOptInfos for them.
*/
add_base_rels_to_query(root, (Node *) parse->jointree);

/*
* Examine the targetlist and join tree, adding entries to baserel
* targetlists for all referenced Vars, and generating PlaceHolderInfo
* entries for all referenced PlaceHolderVars.    Restrict and join clauses
* are added to appropriate lists belonging to the mentioned relations. We
* also build EquivalenceClasses for provably equivalent expressions. The
* SpecialJoinInfo list is also built to hold information about join order
* restrictions.  Finally, we form a target joinlist for make_one_rel() to
* work from.
*/
build_base_rel_tlists(root, tlist);

find_placeholders_in_jointree(root);

joinlist = deconstruct_jointree(root);

/*
* Reconsider any postponed outer-join quals now that we have built up
* equivalence classes.  (This could result in further additions or
* mergings of classes.)
*/
reconsider_outer_join_clauses(root);

/*
* If we formed any equivalence classes, generate additional restriction
* clauses as appropriate.    (Implied join clauses are formed on-the-fly
* later.)
*/
generate_base_implied_equalities(root);

/*
* We have completed merging equivalence sets, so it's now possible to
* convert previously generated pathkeys (in particular, the requested
* query_pathkeys) to canonical form.
*/
canonicalize_all_pathkeys(root);

/*
* Examine any "placeholder" expressions generated during subquery pullup.
* Make sure that the Vars they need are marked as needed at the relevant
* join level.    This must be done before join removal because it might
* cause Vars or placeholders to be needed above a join when they weren't
* so marked before.
*/
fix_placeholder_input_needed_levels(root);

/*
* Remove any useless outer joins.    Ideally this would be done during
* jointree preprocessing, but the necessary information isn't available
* until we've built baserel data structures and classified qual clauses.
*/
joinlist = remove_useless_joins(root, joinlist);

/*
* Now distribute "placeholders" to base rels as needed.  This has to be
* done after join removal because removal could change whether a
* placeholder is evaluatable at a base rel.
*/
add_placeholders_to_base_rels(root);

/*
* We should now have size estimates for every actual table involved in
* the query, and we also know which if any have been deleted from the
* query by join removal; so we can compute total_table_pages.
*
* Note that appendrels are not double-counted here, even though we don't
* bother to distinguish RelOptInfos for appendrel parents, because the
* parents will still have size zero.
*
* XXX if a table is self-joined, we will count it once per appearance,
* which perhaps is the wrong thing ... but that's not completely clear,
* and detecting self-joins here is difficult, so ignore it for now.
*/
total_pages = 0;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];

if (brel == NULL)
continue;

Assert(brel->relid == rti);        /* sanity check on array */

if (brel->reloptkind == RELOPT_BASEREL ||
brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
total_pages += (double) brel->pages;
}
root->total_table_pages = total_pages;

/*
* Ready to do the primary planning.
*/
final_rel = make_one_rel(root, joinlist);

if (!final_rel || !final_rel->cheapest_total_path)
elog(ERROR, "failed to construct the join relation");

/*
* If there's grouping going on, estimate the number of result groups. We
* couldn't do this any earlier because it depends on relation size
* estimates that were set up above.
*
* Then convert tuple_fraction to fractional form if it is absolute, and
* adjust it based on the knowledge that grouping_planner will be doing
* grouping or aggregation work with our result.
*
* This introduces some undesirable coupling between this code and
* grouping_planner, but the alternatives seem even uglier; we couldn't
* pass back completed paths without making these decisions here.
*/
if (parse->groupClause)
{
List       *groupExprs;

groupExprs = get_sortgrouplist_exprs(parse->groupClause,
parse->targetList);
*num_groups = estimate_num_groups(root,
groupExprs,
final_rel->rows);

/*
* In GROUP BY mode, an absolute LIMIT is relative to the number of
* groups not the number of tuples.  If the caller gave us a fraction,
* keep it as-is.  (In both cases, we are effectively assuming that
* all the groups are about the same size.)
*/
if (tuple_fraction >= 1.0)
tuple_fraction /= *num_groups;

/*
* If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY.    Likewise if we
* have both DISTINCT and GROUP BY, or if we have a window
* specification not compatible with the GROUP BY.
*/
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
!pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;

/* In any case, limit_tuples shouldn't be specified here */
Assert(limit_tuples < 0);
}
else if (parse->hasAggs || root->hasHavingQual)
{
/*
* Ungrouped aggregate will certainly want to read all the tuples, and
* it will deliver a single result row (so leave *num_groups 1).
*/
tuple_fraction = 0.0;

/* limit_tuples shouldn't be specified here */
Assert(limit_tuples < 0);
}
else if (parse->distinctClause)
{
/*
* Since there was no grouping or aggregation, it's reasonable to
* assume the UNIQUE filter has effects comparable to GROUP BY. Return
* the estimated number of output rows for use by caller. (If DISTINCT
* is used with grouping, we ignore its effects for rowcount
* estimation purposes; this amounts to assuming the grouped rows are
* distinct already.)
*/
List       *distinctExprs;

distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
parse->targetList);
*num_groups = estimate_num_groups(root,
distinctExprs,
final_rel->rows);

/*
* Adjust tuple_fraction the same way as for GROUP BY, too.
*/
if (tuple_fraction >= 1.0)
tuple_fraction /= *num_groups;

/* limit_tuples shouldn't be specified here */
Assert(limit_tuples < 0);
}
else
{
/*
* Plain non-grouped, non-aggregated query: an absolute tuple fraction
* can be divided by the number of tuples.
*/
if (tuple_fraction >= 1.0)
tuple_fraction /= final_rel->rows;
}

/*
* Pick out the cheapest-total path and the cheapest presorted path for
* the requested pathkeys (if there is one).  We should take the tuple
* fraction into account when selecting the cheapest presorted path, but
* not when selecting the cheapest-total path, since if we have to sort
* then we'll have to fetch all the tuples.  (But there's a special case:
* if query_pathkeys is NIL, meaning order doesn't matter, then the
* "cheapest presorted" path will be the cheapest overall for the tuple
* fraction.)
*
* The cheapest-total path is also the one to use if grouping_planner
* decides to use hashed aggregation, so we return it separately even if
* this routine thinks the presorted path is the winner.
*/
cheapestpath = final_rel->cheapest_total_path;

sortedpath =
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
root->query_pathkeys,
NULL,
tuple_fraction);

/* Don't return same path in both guises; just wastes effort */
if (sortedpath == cheapestpath)
sortedpath = NULL;

/*
* Forget about the presorted path if it would be cheaper to sort the
* cheapest-total path.  Here we need consider only the behavior at the
* tuple fraction point.
*/
if (sortedpath)
{
Path        sort_path;    /* dummy for result of cost_sort */

if (root->query_pathkeys == NIL ||
pathkeys_contained_in(root->query_pathkeys,
cheapestpath->pathkeys))
{
/* No sort needed for cheapest path */
sort_path.startup_cost = cheapestpath->startup_cost;
sort_path.total_cost = cheapestpath->total_cost;
}
else
{
/* Figure cost for sorting */
cost_sort(&sort_path, root, root->query_pathkeys,
cheapestpath->total_cost,
final_rel->rows, final_rel->width,
0.0, work_mem, limit_tuples);
}

if (compare_fractional_path_costs(sortedpath, &sort_path,
tuple_fraction) > 0)
{
/* Presorted path is a loser */
sortedpath = NULL;
}
}

*cheapest_path = cheapestpath;
*sorted_path = sortedpath;
}

接下来从 query_planner 开始再上溯：

/*--------------------
* grouping_planner
*      Perform planning steps related to grouping, aggregation, etc.
*      This primarily means adding top-level processing to the basic
*      query plan produced by query_planner.
*
* tuple_fraction is the fraction of tuples we expect will be retrieved
*
* tuple_fraction is interpreted as follows:
*      0: expect all tuples to be retrieved (normal case)
*      0 < tuple_fraction < 1: expect the given fraction of tuples available
*        from the plan to be retrieved
*      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
*        expected to be retrieved (ie, a LIMIT specification)
*
* Returns a query plan.  Also, root->query_pathkeys is returned as the
* actual output ordering of the plan (in pathkey format).
*--------------------
*/
static Plan *
grouping_planner(PlannerInfo *root, double tuple_fraction)
{
Query       *parse = root->parse;
List       *tlist = parse->targetList;
int64        offset_est = 0;
int64        count_est = 0;
double        limit_tuples = -1.0;
Plan       *result_plan;
List       *current_pathkeys;
double        dNumGroups = 0;
bool        use_hashed_distinct = false;
bool        tested_hashed_distinct = false;

/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
{
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);

/*
* If we have a known LIMIT, and don't have an unknown OFFSET, we can
* estimate the effects of using a bounded sort.
*/
if (count_est > 0 && offset_est >= 0)
limit_tuples = (double) count_est + (double) offset_est;
}

if (parse->setOperations)
{
List       *set_sortclauses;

/*
* If there's a top-level ORDER BY, assume we have to fetch all the
* tuples.    This might be too simplistic given all the hackery below
* to possibly avoid the sort; but the odds of accurate estimates here
* are pretty low anyway.
*/
if (parse->sortClause)
tuple_fraction = 0.0;

/*
* Construct the plan for set operations.  The result will not need
* any work except perhaps a top-level sort and/or LIMIT.  Note that
* any special work for recursive unions is the responsibility of
* plan_set_operations.
*/
result_plan = plan_set_operations(root, tuple_fraction,
&set_sortclauses);

/*
* Calculate pathkeys representing the sort order (if any) of the set
* operation's result.  We have to do this before overwriting the sort
* key information...
*/
current_pathkeys = make_pathkeys_for_sortclauses(root,
set_sortclauses,
result_plan->targetlist,
true);

/*
* We should not need to call preprocess_targetlist, since we must be
* in a SELECT query node.    Instead, use the targetlist returned by
* plan_set_operations (since this tells whether it returned any
* resjunk columns!), and transfer any sort key information from the
* original tlist.
*/
Assert(parse->commandType == CMD_SELECT);

tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
tlist);

/*
* Can't handle FOR UPDATE/SHARE here (parser should have checked
* already, but let's make sure).
*/
if (parse->rowMarks)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));

/*
* Calculate pathkeys that represent result ordering requirements
*/
Assert(parse->distinctClause == NIL);
root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
parse->sortClause,
tlist,
true);
}
else
{
/* No set operations, do regular planning */
List       *sub_tlist;
double        sub_limit_tuples;
AttrNumber *groupColIdx = NULL;
bool        need_tlist_eval = true;
Path       *cheapest_path;
Path       *sorted_path;
Path       *best_path;
long        numGroups = 0;
AggClauseCosts agg_costs;
int            numGroupCols;
double        path_rows;
int            path_width;
bool        use_hashed_grouping = false;
WindowFuncLists *wflists = NULL;
List       *activeWindows = NIL;

MemSet(&agg_costs, 0, sizeof(AggClauseCosts));

/* A recursive query should always have setOperations */
Assert(!root->hasRecursion);

/* Preprocess GROUP BY clause, if any */
if (parse->groupClause)
preprocess_groupclause(root);
numGroupCols = list_length(parse->groupClause);

/* Preprocess targetlist */
tlist = preprocess_targetlist(root, tlist);

/*
* Locate any window functions in the tlist.  (We don't need to look
* anywhere else, since expressions used in ORDER BY will be in there
* too.)  Note that they could all have been eliminated by constant
* folding, in which case we don't need to do any more work.
*/
if (parse->hasWindowFuncs)
{
wflists = find_window_functions((Node *) tlist,
list_length(parse->windowClause));
if (wflists->numWindowFuncs > 0)
activeWindows = select_active_windows(root, wflists);
else
parse->hasWindowFuncs = false;
}

/*
* Generate appropriate target list for subplan; may be different from
* tlist if grouping or aggregation is needed.
*/
sub_tlist = make_subplanTargetList(root, tlist,
&groupColIdx, &need_tlist_eval);

/*
* Do aggregate preprocessing, if the query has any aggs.
*
* Note: think not that we can turn off hasAggs if we find no aggs. It
* is possible for constant-expression simplification to remove all
* explicit references to aggs, but we still have to follow the
* aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
{
/*
* Collect statistics about aggregates for estimating costs. Note:
* we do not attempt to detect duplicate aggregates here; a
* somewhat-overestimated cost is okay for our present purposes.
*/
count_agg_clauses(root, (Node *) tlist, &agg_costs);
count_agg_clauses(root, parse->havingQual, &agg_costs);

/*
* Preprocess MIN/MAX aggregates, if any.  Note: be careful about
* adding logic between here and the optimize_minmax_aggregates
* call.  Anything that is needed in MIN/MAX-optimizable cases
* will have to be duplicated in planagg.c.
*/
preprocess_minmax_aggregates(root, tlist);
}

/*
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed.    The sortClause is
* certainly sort-able, but GROUP BY and DISTINCT might not be, in
* which case we just leave their pathkeys empty.
*/
if (parse->groupClause &&
grouping_is_sortable(parse->groupClause))
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->groupClause,
tlist,
false);
else
root->group_pathkeys = NIL;

/* We consider only the first (bottom) window in pathkeys logic */
if (activeWindows != NIL)
{
WindowClause *wc = (WindowClause *) linitial(activeWindows);

root->window_pathkeys = make_pathkeys_for_window(root,
wc,
tlist,
false);
}
else
root->window_pathkeys = NIL;

if (parse->distinctClause &&
grouping_is_sortable(parse->distinctClause))
root->distinct_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->distinctClause,
tlist,
false);
else
root->distinct_pathkeys = NIL;

root->sort_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->sortClause,
tlist,
false);

/*
* Figure out whether we want a sorted result from query_planner.
*
* If we have a sortable GROUP BY clause, then we want a result sorted
* properly for grouping.  Otherwise, if we have window functions to
* evaluate, we try to sort for the first window.  Otherwise, if
* there's a sortable DISTINCT clause that's more rigorous than the
* ORDER BY clause, we try to produce output that's sufficiently well
* sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
* clause, we want to sort by the ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
* superset of GROUP BY, it would be tempting to request sort by ORDER
* BY --- but that might just leave us failing to exploit an available
* sort order at all.  Needs more thought.    The choice for DISTINCT
* versus ORDER BY is much easier, since we know that the parser
* ensured that one is a superset of the other.
*/
if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
else if (root->window_pathkeys)
root->query_pathkeys = root->window_pathkeys;
else if (list_length(root->distinct_pathkeys) >
list_length(root->sort_pathkeys))
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
root->query_pathkeys = NIL;

/*
* Figure out whether there's a hard limit on the number of rows that
* query_planner's result subplan needs to return.  Even if we know a
* hard limit overall, it doesn't apply if the query has any
* grouping/aggregation operations.
*/
if (parse->groupClause ||
parse->distinctClause ||
parse->hasAggs ||
parse->hasWindowFuncs ||
root->hasHavingQual)
sub_limit_tuples = -1.0;
else
sub_limit_tuples = limit_tuples;

/*
* Generate the best unsorted and presorted paths for this Query (but
* note there may not be any presorted path).  query_planner will also
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);

/*
* Extract rowcount and width estimates for possible use in grouping
* decisions.  Beware here of the possibility that
* cheapest_path->parent is NULL (ie, there is no FROM clause).
*/
if (cheapest_path->parent)
{
path_rows = cheapest_path->parent->rows;
path_width = cheapest_path->parent->width;
}
else
{
path_rows = 1;        /* assume non-set result */
path_width = 100;    /* arbitrary */
}

if (parse->groupClause)
{
/*
* If grouping, decide whether to use sorted or hashed grouping.
*/
use_hashed_grouping =
choose_hashed_grouping(root,
tuple_fraction, limit_tuples,
path_rows, path_width,
cheapest_path, sorted_path,
dNumGroups, &agg_costs);
/* Also convert # groups to long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
}
else if (parse->distinctClause && sorted_path &&
!root->hasHavingQual && !parse->hasAggs && !activeWindows)
{
/*
* We'll reach the DISTINCT stage without any intermediate
* processing, so figure out whether we will want to hash or not
* so we can choose whether to use cheapest or sorted path.
*/
use_hashed_distinct =
choose_hashed_distinct(root,
tuple_fraction, limit_tuples,
path_rows, path_width,
cheapest_path->startup_cost,
cheapest_path->total_cost,
sorted_path->startup_cost,
sorted_path->total_cost,
sorted_path->pathkeys,
dNumGroups);
tested_hashed_distinct = true;
}

/*
* Select the best path.  If we are doing hashed grouping, we will
* always read all the input tuples, so use the cheapest-total path.
* Otherwise, trust query_planner's decision about which to use.
*/
if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
best_path = cheapest_path;
else
best_path = sorted_path;

/*
* Check to see if it's possible to optimize MIN/MAX aggregates. If
* so, we will forget all the work we did so far to choose a "regular"
* path ... but we had to do it anyway to be able to tell which way is
* cheaper.
*/
result_plan = optimize_minmax_aggregates(root,
tlist,
&agg_costs,
best_path);
if (result_plan != NULL)
{
/*
* optimize_minmax_aggregates generated the full plan, with the
* right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
}
else
{
/*
* Normal case --- create a plan according to query_planner's
* results.
*/
bool        need_sort_for_grouping = false;

result_plan = create_plan(root, best_path);
current_pathkeys = best_path->pathkeys;

/* Detect if we'll need an explicit sort for grouping */
if (parse->groupClause && !use_hashed_grouping &&
!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
need_sort_for_grouping = true;

/*
* Always override create_plan's tlist, so that we don't sort
* useless data from a "physical" tlist.
*/
need_tlist_eval = true;
}

/*
* create_plan returns a plan with just a "flat" tlist of required
* Vars.  Usually we need to insert the sub_tlist as the tlist of
* the top plan node.  However, we can skip that if we determined
* that whatever create_plan chose to return will be good enough.
*/
if (need_tlist_eval)
{
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(root,
sub_tlist,
NULL,
result_plan);
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
*/
result_plan->targetlist = sub_tlist;
}

/*
* Also, account for the cost of evaluation of the sub_tlist.
* See comments for add_tlist_costs_to_plan() for more info.
*/
add_tlist_costs_to_plan(root, result_plan, sub_tlist);
}
else
{
/*
* Since we're using create_plan's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*/
locate_grouping_columns(root, tlist, result_plan->targetlist,
groupColIdx);
}

/*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(root,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
&agg_costs,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
numGroups,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;

if (parse->groupClause)
{
if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = root->group_pathkeys;
}
aggstrategy = AGG_SORTED;

/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
}
else
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}

result_plan = (Plan *) make_agg(root,
tlist,
(List *) parse->havingQual,
aggstrategy,
&agg_costs,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
numGroups,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus
* the appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come out
* the way the GROUP node needs it.
*/
if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = root->group_pathkeys;
}

result_plan = (Plan *) make_group(root,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
extract_grouping_ops(parse->groupClause),
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else if (root->hasHavingQual)
{
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM
* table at all!  We can just throw away the plan-so-far and
* generate a Result node.    This is a sufficiently unusual
* corner case that it's not worth contorting the structure of
* this routine to avoid having to generate the plan in the
* first place.
*/
result_plan = (Plan *) make_result(root,
tlist,
parse->havingQual,
NULL);
}
}                        /* end of non-minmax-aggregate case */

/*
* Since each window function could require a different sort order, we
* stack up a WindowAgg node for each window, with sort steps between
* them as needed.
*/
if (activeWindows)
{
List       *window_tlist;
ListCell   *l;

/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the desired
* tlist.  (In some cases this might not really be required, but
* it's not worth trying to avoid it.)  Note that on second and
* subsequent passes through the following loop, the top-level
* node will be a WindowAgg which we know can project; so we only
* need to check once.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(root,
NIL,
NULL,
result_plan);
}

/*
* The "base" targetlist for all steps of the windowing process is
* a flat tlist of all Vars and Aggs needed in the result.  (In
* some cases we wouldn't need to propagate all of these all the
* way to the top, since they might only be needed as inputs to
* WindowFuncs.  It's probably not worth trying to optimize that
* though.)  We also add window partitioning and sorting
* expressions to the base tlist, to ensure they're computed only
* once at the bottom of the stack (that's critical for volatile
* functions).  As we climb up the stack, we'll add outputs for
* the WindowFuncs computed at each level.
*/
window_tlist = make_windowInputTargetList(root,
tlist,
activeWindows);

/*
* The copyObject steps here are needed to ensure that each plan
* node has a separately modifiable tlist.  (XXX wouldn't a
* shallow list copy do for that?)
*/
result_plan->targetlist = (List *) copyObject(window_tlist);

foreach(l, activeWindows)
{
WindowClause *wc = (WindowClause *) lfirst(l);
List       *window_pathkeys;
int            partNumCols;
AttrNumber *partColIdx;
Oid           *partOperators;
int            ordNumCols;
AttrNumber *ordColIdx;
Oid           *ordOperators;

window_pathkeys = make_pathkeys_for_window(root,
wc,
tlist,
true);

/*
* This is a bit tricky: we build a sort node even if we don't
* really have to sort.  Even when no explicit sort is needed,
* we need to have suitable resjunk items added to the input
* plan's tlist for any partitioning or ordering columns that
* aren't plain Vars.  (In theory, make_windowInputTargetList
* should have provided all such columns, but let's not assume
* that here.)  Furthermore, this way we can use existing
* infrastructure to identify which input columns are the
* interesting ones.
*/
if (window_pathkeys)
{
Sort       *sort_plan;

sort_plan = make_sort_from_pathkeys(root,
result_plan,
window_pathkeys,
-1.0);
if (!pathkeys_contained_in(window_pathkeys,
current_pathkeys))
{
/* we do indeed need to sort */
result_plan = (Plan *) sort_plan;
current_pathkeys = window_pathkeys;
}
/* In either case, extract the per-column information */
get_column_info_for_window(root, wc, tlist,
sort_plan->numCols,
sort_plan->sortColIdx,
&partNumCols,
&partColIdx,
&partOperators,
&ordNumCols,
&ordColIdx,
&ordOperators);
}
else
{
/* empty window specification, nothing to sort */
partNumCols = 0;
partColIdx = NULL;
partOperators = NULL;
ordNumCols = 0;
ordColIdx = NULL;
ordOperators = NULL;
}

if (lnext(l))
{
/* Add the current WindowFuncs to the running tlist */
window_tlist = add_to_flat_tlist(window_tlist,
wflists->windowFuncs[wc->winref]);
}
else
{
/* Install the original tlist in the topmost WindowAgg */
window_tlist = tlist;
}

/* ... and make the WindowAgg plan node */
result_plan = (Plan *)
make_windowagg(root,
(List *) copyObject(window_tlist),
wflists->windowFuncs[wc->winref],
wc->winref,
partNumCols,
partColIdx,
partOperators,
ordNumCols,
ordColIdx,
ordOperators,
wc->frameOptions,
wc->startOffset,
wc->endOffset,
result_plan);
}
}
}                            /* end of if (setOperations) */

/*
* If there is a DISTINCT clause, add the necessary node(s).
*/
if (parse->distinctClause)
{
double        dNumDistinctRows;
long        numDistinctRows;

/*
* If there was grouping or aggregation, use the current number of
* rows as the estimated number of DISTINCT rows (ie, assume the
* result was already mostly unique).  If not, use the number of
* distinct-groups calculated by query_planner.
*/
if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
dNumDistinctRows = result_plan->plan_rows;
else
dNumDistinctRows = dNumGroups;

/* Also convert to long int --- but 'ware overflow! */
numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);

/* Choose implementation method if we didn't already */
if (!tested_hashed_distinct)
{
/*
* At this point, either hashed or sorted grouping will have to
* work from result_plan, so we pass that as both "cheapest" and
* "sorted".
*/
use_hashed_distinct =
choose_hashed_distinct(root,
tuple_fraction, limit_tuples,
result_plan->plan_rows,
result_plan->plan_width,
result_plan->startup_cost,
result_plan->total_cost,
result_plan->startup_cost,
result_plan->total_cost,
current_pathkeys,
dNumDistinctRows);
}

if (use_hashed_distinct)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(root,
result_plan->targetlist,
NIL,
AGG_HASHED,
NULL,
list_length(parse->distinctClause),
extract_grouping_cols(parse->distinctClause,
result_plan->targetlist),
extract_grouping_ops(parse->distinctClause),
numDistinctRows,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else
{
/*
* Use a Unique node to implement DISTINCT.  Add an explicit sort
* if we couldn't make the path come out the way the Unique node
* needs it.  If we do have to sort, always sort by the more
* rigorous of DISTINCT and ORDER BY, to avoid a second sort
* below.  However, for regular DISTINCT, don't sort now if we
* don't have to --- sorting afterwards will likely be cheaper,
* and also has the possibility of optimizing via LIMIT.  But for
* DISTINCT ON, we *must* force the final sort now, else it won't
* have the desired behavior.
*/
List       *needed_pathkeys;

if (parse->hasDistinctOn &&
list_length(root->distinct_pathkeys) <
list_length(root->sort_pathkeys))
needed_pathkeys = root->sort_pathkeys;
else
needed_pathkeys = root->distinct_pathkeys;

if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
{
if (list_length(root->distinct_pathkeys) >=
list_length(root->sort_pathkeys))
current_pathkeys = root->distinct_pathkeys;
else
{
current_pathkeys = root->sort_pathkeys;
/* Assert checks that parser didn't mess up... */
Assert(pathkeys_contained_in(root->distinct_pathkeys,
current_pathkeys));
}

result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
current_pathkeys,
-1.0);
}

result_plan = (Plan *) make_unique(result_plan,
parse->distinctClause);
result_plan->plan_rows = dNumDistinctRows;
/* The Unique node won't change sort ordering */
}
}

/*
* If ORDER BY was given and we were not able to make the plan come out in
* the right order, add an explicit sort step.
*/
if (parse->sortClause)
{
if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
root->sort_pathkeys,
limit_tuples);
current_pathkeys = root->sort_pathkeys;
}
}

/*
* If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
* intentionally test parse->rowMarks not root->rowMarks here. If there
* are only non-locking rowmarks, they should be handled by the
* ModifyTable node instead.)
*/
if (parse->rowMarks)
{
result_plan = (Plan *) make_lockrows(result_plan,
root->rowMarks,
SS_assign_special_param(root));

/*
* The result can no longer be assumed sorted, since locking might
* cause the sort key columns to be replaced with new values.
*/
current_pathkeys = NIL;
}

/*
* Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
*/
if (parse->limitCount || parse->limitOffset)
{
result_plan = (Plan *) make_limit(result_plan,
parse->limitOffset,
parse->limitCount,
offset_est,
count_est);
}

/*
* Return the actual output ordering in query_pathkeys for possible use by
* an outer query level.
*/
root->query_pathkeys = current_pathkeys;

return result_plan;
}

简化：

/*--------------------
* grouping_planner
*      Perform planning steps related to grouping, aggregation, etc.
*      This primarily means adding top-level processing to the basic
*      query plan produced by query_planner.
*
* tuple_fraction is the fraction of tuples we expect will be retrieved
*
* tuple_fraction is interpreted as follows:
*      0: expect all tuples to be retrieved (normal case)
*      0 < tuple_fraction < 1: expect the given fraction of tuples available
*        from the plan to be retrieved
*      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
*        expected to be retrieved (ie, a LIMIT specification)
*
* Returns a query plan.  Also, root->query_pathkeys is returned as the
* actual output ordering of the plan (in pathkey format).
*--------------------
*/
static Plan *
grouping_planner(PlannerInfo *root, double tuple_fraction)
{
...

if (parse->setOperations)
{
...
}
else
{
...
/*
* Generate the best unsorted and presorted paths for this Query (but
* note there may not be any presorted path).  query_planner will also
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);

...
}                            /* end of if (setOperations) */    ...

return result_plan;
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航