//--------------------------------------------------------------------------- // Greenplum Database // Copyright (C) 2012 EMC Corp. // // @filename: // CQueryMutators.cpp // // @doc: // Implementation of methods used during translating a GPDB Query object into a // DXL Tree // // @test: // //--------------------------------------------------------------------------- extern "C" { #include "postgres.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/parsenodes.h" #include "nodes/plannodes.h" /* optimizer/walkers.h not present in PG18 — walker macros defined inline */ #define query_or_expression_tree_walker(n,w,c,f) \ query_or_expression_tree_walker_impl(n,(bool(*)(Node*,void*))w,c,f) #define expression_tree_walker(n,w,c) \ expression_tree_walker_impl(n,(bool(*)(Node*,void*))w,c) #define query_tree_walker(q,w,c,f) \ query_tree_walker_impl(q,(bool(*)(Node*,void*))w,c,f) #define expression_tree_mutator(n,m,c) \ expression_tree_mutator_impl(n,(Node*(*)(Node*,void*))m,c) #define query_tree_mutator(q,m,c,f) \ query_tree_mutator_impl(q,(Node*(*)(Node*,void*))m,c,f) #define query_or_expression_tree_mutator(n,m,c,f) \ query_or_expression_tree_mutator_impl(n,(Node*(*)(Node*,void*))m,c,f) } #include "gpopt/base/CUtils.h" #include "gpopt/gpdbwrappers.h" #include "gpopt/mdcache/CMDAccessor.h" #include "gpopt/mdcache/CMDAccessorUtils.h" #include "gpopt/translate/CQueryMutators.h" #include "gpopt/translate/CTranslatorDXLToPlStmt.h" #include "naucrates/exception.h" #include "naucrates/md/IMDAggregate.h" #include "naucrates/md/IMDScalarOp.h" #include "naucrates/md/IMDTypeBool.h" using namespace gpdxl; using namespace gpmd; //--------------------------------------------------------------------------- // @function: // CQueryMutators::NeedsProjListNormalization // // @doc: // Is the group by project list flat (contains only aggregates, grouping // funcs, and grouping columns) //--------------------------------------------------------------------------- BOOL CQueryMutators::NeedsProjListNormalization(const Query *query) { if (!query->hasAggs && nullptr == query->groupClause && nullptr == query->groupingSets) { return false; } SContextTLWalker context(query->targetList, query->groupClause); ListCell *lc = nullptr; ForEach(lc, query->targetList) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); if (ShouldFallback((Node *) target_entry->expr, &context)) { // TODO: remove temporary fix (revert exception to assert) to avoid crash during algebrization GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLError, GPOS_WSZ_LIT("No attribute")); } // Normalize when there is an expression that is neither used for grouping // nor is an aggregate function if (!IsA(target_entry->expr, Aggref) && !IsA(target_entry->expr, GroupingFunc) && !CTranslatorUtils::IsGroupingColumn((Node *) target_entry->expr, query->groupClause, query->targetList)) { return true; } } return false; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::ShouldFallback // // @doc: // Fall back when the target list refers to a attribute which algebrizer // at this point cannot resolve //--------------------------------------------------------------------------- BOOL CQueryMutators::ShouldFallback(Node *node, SContextTLWalker *context) { if (nullptr == node) { return false; } if (IsA(node, Const) || IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, SubLink)) { return false; } TargetEntry *entry = gpdb::FindFirstMatchingMemberInTargetList( node, context->m_target_entries); if (nullptr != entry && CTranslatorUtils::IsGroupingColumn( (Node *) entry->expr, context->m_group_clause, context->m_target_entries)) { return false; } if (IsA(node, SubLink)) { return false; } if (IsA(node, Var)) { Var *var = (Var *) node; if (0 == var->varlevelsup) { // if we reach a Var that was not a grouping column then there is an equivalent column // which the algebrizer at this point cannot resolve // example: consider two table r(a,b) and s(c,d) and the following query // SELECT a from r LEFT JOIN s on (r.a = s.c) group by r.a // In the query object, generated by the parse, the output columns refer to the output of // the left outer join while the grouping column refers to the base table column. // While r.a and a are equivalent, the algebrizer at this point cannot detect this. // Therefore, we fall back. return true; } return false; } return gpdb::WalkExpressionTree( node, (ExprWalkerFn) CQueryMutators::ShouldFallback, context); } //--------------------------------------------------------------------------- // @function: // CQueryMutators::NormalizeGroupByProjList // // @doc: // Flatten expressions in project list to contain only aggregates, grouping // funcs and grouping columns // ORGINAL QUERY: // SELECT * from r where r.a > (SELECT max(c) + min(d) FROM t where r.b = t.e) // NEW QUERY: // SELECT * from r where r.a > (SELECT x1+x2 as x3 // FROM (SELECT max(c) as x2, min(d) as x2 // FROM t where r.b = t.e) t2) //--------------------------------------------------------------------------- Query * CQueryMutators::NormalizeGroupByProjList(CMemoryPool *mp, CMDAccessor *md_accessor, const Query *query) { Query *query_copy = (Query *) gpdb::CopyObject(const_cast(query)); if (!NeedsProjListNormalization(query_copy)) { return query_copy; } Query *new_query = ConvertToDerivedTable(query_copy, false /*should_fix_target_list*/, true /*should_fix_having_qual*/); gpdb::GPDBFree(query_copy); GPOS_ASSERT(1 == gpdb::ListLength(new_query->rtable)); Query *derived_table_query = (Query *) ((RangeTblEntry *) gpdb::ListNth(new_query->rtable, 0)) ->subquery; SContextGrpbyPlMutator context(mp, md_accessor, derived_table_query, nullptr); List *target_list_copy = (List *) gpdb::CopyObject(derived_table_query->targetList); ListCell *lc = nullptr; // first normalize grouping columns ForEach(lc, target_list_copy) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); GPOS_ASSERT(nullptr != target_entry); if (CTranslatorUtils::IsGroupingColumn( target_entry, derived_table_query->groupClause)) { target_entry->expr = (Expr *) FixGroupingCols( (Node *) target_entry->expr, target_entry, &context); } } lc = nullptr; // normalize remaining project elements ForEach(lc, target_list_copy) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); GPOS_ASSERT(nullptr != target_entry); BOOL is_grouping_col = CTranslatorUtils::IsGroupingColumn( target_entry, derived_table_query->groupClause); if (!is_grouping_col) { target_entry->expr = (Expr *) RunExtractAggregatesMutator( (Node *) target_entry->expr, &context); GPOS_ASSERT(!IsA(target_entry->expr, Aggref) && "New target list entry should not contain any Aggrefs"); GPOS_ASSERT( !IsA(target_entry->expr, GroupingFunc) && "New target list entry should not contain any GroupingFuncs"); } } derived_table_query->targetList = context.m_lower_table_tlist; new_query->targetList = target_list_copy; ReassignSortClause(new_query, derived_table_query); return new_query; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::RunIncrLevelsUpMutator // // @doc: // Increment any the query levels up of any outer reference by one //--------------------------------------------------------------------------- Node * CQueryMutators::RunIncrLevelsUpMutator(Node *node, SContextIncLevelsupMutator *context) { if (nullptr == node) { return nullptr; } if (IsA(node, Var)) { Var *var = (Var *) gpdb::CopyObject(node); // Consider the following use case: // ORGINAL QUERY: // SELECT * from r where r.a > (SELECT max(c) + min(d) // FROM t where r.b = t.e) // NEW QUERY: // SELECT * from r where r.a > (SELECT x1+x2 as x3 // FROM (SELECT max(c) as x2, min(d) as x2 // FROM t where r.b = t.e) t2) // // In such a scenario, we need increment the levels up for the // correlation variable r.b in the subquery by 1. if (var->varlevelsup > context->m_current_query_level) { var->varlevelsup++; return (Node *) var; } return (Node *) var; } if (IsA(node, TargetEntry) && 0 == context->m_current_query_level && !context->m_should_fix_top_level_target_list) { return (Node *) gpdb::CopyObject(node); } // recurse into query structure if (IsA(node, Query)) { context->m_current_query_level++; Query *query = gpdb::MutateQueryTree( (Query *) node, (MutatorWalkerFn) CQueryMutators::RunIncrLevelsUpMutator, context, 0 // flags ); context->m_current_query_level--; return (Node *) query; } return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) CQueryMutators::RunIncrLevelsUpMutator, context); } //--------------------------------------------------------------------------- // CQueryMutators::RunFixCTELevelsUpWalker // // Increment CTE range table reference by one if it refers to // an ancestor of the original Query node (level 0 in the context) //--------------------------------------------------------------------------- BOOL CQueryMutators::RunFixCTELevelsUpWalker(Node *node, SContextIncLevelsupMutator *context) { if (nullptr == node) { return false; } if (IsA(node, RangeTblEntry)) { RangeTblEntry *rte = (RangeTblEntry *) node; if (RTE_CTE == rte->rtekind && rte->ctelevelsup >= context->m_current_query_level) { // fix the levels up for CTE range table entry when needed // the walker in GPDB does not walk range table entries of type CTE rte->ctelevelsup++; } // always return false, as we want to continue fixing up RTEs return false; } // recurse into query structure, incrementing the query level if (IsA(node, Query)) { context->m_current_query_level++; BOOL result = gpdb::WalkQueryTree( (Query *) node, (ExprWalkerFn) CQueryMutators::RunFixCTELevelsUpWalker, context, QTW_EXAMINE_RTES_BEFORE // flags - visit RTEs ); context->m_current_query_level--; return result; } if (IsA(node, TargetEntry) && !context->m_should_fix_top_level_target_list && 0 == context->m_current_query_level) { // skip the top-level target list, if requested return false; } return gpdb::WalkExpressionTree( node, (ExprWalkerFn) CQueryMutators::RunFixCTELevelsUpWalker, context); } //--------------------------------------------------------------------------- // @function: // CQueryMutators::RunGroupingColMutator // // @doc: // Mutate the grouping columns, fix levels up when necessary // //--------------------------------------------------------------------------- Node * CQueryMutators::RunGroupingColMutator(Node *node, SContextGrpbyPlMutator *context) { if (nullptr == node) { return nullptr; } if (IsA(node, Const)) { return (Node *) gpdb::CopyObject(node); } if (IsA(node, Var)) { Var *var_copy = (Var *) gpdb::CopyObject(node); if (var_copy->varlevelsup > context->m_current_query_level) { var_copy->varlevelsup++; } return (Node *) var_copy; } if (IsA(node, Aggref)) { // merely fix the arguments of an aggregate Aggref *old_aggref = (Aggref *) node; Aggref *aggref = FlatCopyAggref(old_aggref); aggref->agglevelsup = old_aggref->agglevelsup; List *new_args = NIL; ListCell *lc = nullptr; BOOL is_agg = context->m_is_mutating_agg_arg; context->m_is_mutating_agg_arg = true; ForEach(lc, old_aggref->args) { Node *arg = (Node *) gpdb::CopyObject((Node *) lfirst(lc)); GPOS_ASSERT(nullptr != arg); // traverse each argument and fix levels up when needed new_args = gpdb::LAppend( new_args, gpdb::MutateQueryOrExpressionTree( arg, (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, (void *) context, 0 // flags -- mutate into cte-lists )); } context->m_is_mutating_agg_arg = is_agg; aggref->args = new_args; return (Node *) aggref; } if (IsA(node, GroupingFunc)) { // FIXME: we do not fix levelsup for GroupingFunc here, the translator // will fall back later when it detects levelsup > 0. We need to do // similar things as AggRef here when ORCA adds support for GroupingFunc // with outer refs return (Node *) gpdb::CopyObject(node); } if (IsA(node, SubLink)) { SubLink *old_sublink = (SubLink *) node; SubLink *new_sublink = MakeNode(SubLink); new_sublink->subLinkType = old_sublink->subLinkType; new_sublink->location = old_sublink->location; new_sublink->operName = (List *) gpdb::CopyObject(old_sublink->operName); new_sublink->testexpr = gpdb::MutateQueryOrExpressionTree( old_sublink->testexpr, (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, (void *) context, 0 // flags -- mutate into cte-lists ); context->m_current_query_level++; GPOS_ASSERT(IsA(old_sublink->subselect, Query)); // One need to call the Query mutator for subselect and take into // account that SubLink can be multi-level. Therefore, the // context->m_current_query_level must be modified properly // while diving into such nested SubLink. new_sublink->subselect = RunGroupingColMutator(old_sublink->subselect, context); context->m_current_query_level--; return (Node *) new_sublink; } if (IsA(node, CommonTableExpr)) { CommonTableExpr *cte = (CommonTableExpr *) gpdb::CopyObject(node); context->m_current_query_level++; GPOS_ASSERT(IsA(cte->ctequery, Query)); cte->ctequery = gpdb::MutateQueryOrExpressionTree( cte->ctequery, (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, (void *) context, 0 // flags --- mutate into cte-lists ); context->m_current_query_level--; return (Node *) cte; } // recurse into query structure if (IsA(node, Query)) { Query *query = gpdb::MutateQueryTree( (Query *) node, (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, context, 1 // flag -- do not mutate range table entries ); // fix the outer reference in derived table entries List *rtable = query->rtable; ListCell *lc = nullptr; ForEach(lc, rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); if (RTE_SUBQUERY == rte->rtekind) { Query *subquery = rte->subquery; // since we did not walk inside derived tables context->m_current_query_level++; rte->subquery = (Query *) RunGroupingColMutator((Node *) subquery, context); context->m_current_query_level--; gpdb::GPDBFree(subquery); } } return (Node *) query; } return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, context); } //--------------------------------------------------------------------------- // @function: // CQueryMutators::FixGroupingCols // // @doc: // Mutate the grouping columns, fix levels up when necessary //--------------------------------------------------------------------------- Node * CQueryMutators::FixGroupingCols(Node *node, TargetEntry *orginal_target_entry, SContextGrpbyPlMutator *context) { GPOS_ASSERT(nullptr != node); ULONG arity = gpdb::ListLength(context->m_lower_table_tlist) + 1; // fix any outer references in the grouping column expression Node *expr = (Node *) RunGroupingColMutator(node, context); CHAR *name = CQueryMutators::GetTargetEntryColName(orginal_target_entry, context->m_query); TargetEntry *new_target_entry = gpdb::MakeTargetEntry( (Expr *) expr, (AttrNumber) arity, name, false /*resjunk */); new_target_entry->ressortgroupref = orginal_target_entry->ressortgroupref; new_target_entry->resjunk = false; context->m_lower_table_tlist = gpdb::LAppend(context->m_lower_table_tlist, new_target_entry); Var *new_var = gpdb::MakeVar( 1, // varno (AttrNumber) arity, gpdb::ExprType((Node *) orginal_target_entry->expr), gpdb::ExprTypeMod((Node *) orginal_target_entry->expr), 0 // query levelsup ); return (Node *) new_var; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::GetTargetEntryForAggExpr // // @doc: // Return a target entry for an aggregate expression //--------------------------------------------------------------------------- TargetEntry * CQueryMutators::GetTargetEntryForAggExpr(CMemoryPool *mp, CMDAccessor *md_accessor, Node *node, ULONG attno) { GPOS_ASSERT(IsA(node, Aggref) || IsA(node, GroupingFunc)); // get the function/aggregate name CHAR *name = nullptr; if (IsA(node, GroupingFunc)) { name = CTranslatorUtils::CreateMultiByteCharStringFromWCString( GPOS_WSZ_LIT("grouping")); } else { Aggref *aggref = (Aggref *) node; CMDIdGPDB *agg_mdid = GPOS_NEW(mp) CMDIdGPDB(IMDId::EmdidGeneral, aggref->aggfnoid); const IMDAggregate *md_agg = md_accessor->RetrieveAgg(agg_mdid); agg_mdid->Release(); const CWStringConst *str = md_agg->Mdname().GetMDName(); name = CTranslatorUtils::CreateMultiByteCharStringFromWCString( str->GetBuffer()); } GPOS_ASSERT(nullptr != name); return gpdb::MakeTargetEntry((Expr *) node, (AttrNumber) attno, name, false); } // Traverse the entire tree under an arbitrarily complex project element (node) // to extract all aggregate functions out into the derived query's target list // // This mutator should be called after creating a derived query (a subquery in // the FROM clause), on each element in the old query's target list or qual to // update any AggRef, GroupingFunc & Var to refer to the output from the derived // query. // // See comments below & in the callers for specific use cases. Node * CQueryMutators::RunExtractAggregatesMutator(Node *node, SContextGrpbyPlMutator *context) { if (nullptr == node) { return nullptr; } if (IsA(node, Const)) { return (Node *) gpdb::CopyObject(node); } if (IsA(node, Aggref)) { Aggref *old_aggref = (Aggref *) node; // If the agglevelsup matches the current query level, this Aggref only // uses vars from the top level query. This needs to be moved to the // derived query, and the entire AggRef replaced with a Var referencing the // derived table's target list. if (old_aggref->agglevelsup == context->m_current_query_level) { Aggref *new_aggref = FlatCopyAggref(old_aggref); BOOL is_agg_old = context->m_is_mutating_agg_arg; ULONG agg_levels_up = context->m_agg_levels_up; context->m_is_mutating_agg_arg = true; context->m_agg_levels_up = old_aggref->agglevelsup; List *new_args = NIL; ListCell *lc = nullptr; ForEach(lc, old_aggref->args) { Node *arg = (Node *) lfirst(lc); GPOS_ASSERT(nullptr != arg); // traverse each argument and fix levels up when needed new_args = gpdb::LAppend( new_args, gpdb::MutateQueryOrExpressionTree( arg, (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) context, 0 // mutate into cte-lists )); } new_aggref->args = new_args; context->m_is_mutating_agg_arg = is_agg_old; context->m_agg_levels_up = agg_levels_up; // create a new entry in the derived table and return its corresponding var return (Node *) MakeVarInDerivedTable((Node *) new_aggref, context); } } if (0 == context->m_current_query_level) { if (IsA(node, Var) && context->m_is_mutating_agg_arg) { // This mutator may be run on a nested query object with aggregates on // outer references. It pulls out any aggregates and moves it into the // derived query (which is subquery), in effect, increasing the levels up // any Var in the aggregate must now reference // // e.g SELECT (SELECT sum(o.o) + 1 FROM i GRP BY i.i) FROM o; // becomes SELECT (SELECT x + 1 FROM (SELECT sum(o.o) GRP BY i.i)) FROM o; // which means Var::varlevelup must be increased for o.o return (Node *) IncrLevelsUpIfOuterRef((Var *) node); } if (IsA(node, GroupingFunc)) { // create a new entry in the derived table and return its corresponding var Node *node_copy = (Node *) gpdb::CopyObject(node); return (Node *) MakeVarInDerivedTable(node_copy, context); } if (!context->m_is_mutating_agg_arg) { // check if an entry already exists, if so no need for duplicate Node *found_node = FindNodeInGroupByTargetList(node, context); if (nullptr != found_node) { return found_node; } } } if (IsA(node, Var)) { Var *var = (Var *) gpdb::CopyObject(node); // Handle other top-level outer references in the project element. if (var->varlevelsup == context->m_current_query_level) { if (var->varlevelsup >= context->m_agg_levels_up) { // If Var references the top level query (varlevelsup = m_current_query_level) // inside an Aggref that also references top level query, the Aggref is moved // to the derived query (see comments in Aggref if-case above). // And, therefore, if we are mutating such Vars inside the Aggref, we must // change their varlevelsup field in order to preserve correct reference level. // i.e these Vars are pulled up as the part of the Aggref by the m_agg_levels_up. // e.g: // select (select max((select foo.a))) from foo; // is transformed into // select (select fnew.max_t) // from (select max((select foo.a)) max_t from foo) fnew; // Here the foo.a inside max referenced top level RTE foo at // varlevelsup = 2 inside the Aggref at agglevelsup 1. Then the // Aggref is brought up to the top-query-level of fnew and foo.a // inside Aggref is bumped up by original Aggref's level. // We may visualize that logic with the following diagram: // Query <------┐ <--------------------┐ // | | // | m_agg_levels_up = 1 | // | | // Aggref --┘ | varlevelsup = 2 // | // | // | // Var -------------------------┘ var->varlevelsup -= context->m_agg_levels_up; return (Node *) var; } // Skip vars inside Aggrefs, since they have already been fixed when they // were moved into the derived query in ConvertToDerivedTable(), and thus, // the relative varno, varattno & varlevelsup should still be valid. // e.g: // SELECT foo.b+1, avg(( SELECT bar.f FROM bar // WHERE bar.d = foo.b)) AS t // FROM foo GROUP BY foo.b; // is transformed into // SELECT fnew.b+1, fnew.avg_t // FROM (SELECT foo.b,`avg(( SELECT bar.f FROM bar // WHERE bar.d = foo.b)) AS t // FROM foo) fnew; // // Note the foo.b outerref in subquery inside the avg() aggregation. // Because it is inside the aggregation, it was pushed down along with // the aggregate function, and thus does not need to be fixed. if (context->m_is_mutating_agg_arg) { return (Node *) var; } // For other top-level references, correct their varno & varattno, since // they now must refer to the target list of the derived query - whose // target list may be different from the original query. // Set varlevelsup to 0 temporarily while searching in the target list var->varlevelsup = 0; TargetEntry *found_tle = gpdb::FindFirstMatchingMemberInTargetList( (Node *) var, context->m_lower_table_tlist); if (nullptr == found_tle) { // Consider two table r(a,b) and s(c,d) and the following query // SELECT 1 from r LEFT JOIN s on (r.a = s.c) group by r.a having count(*) > a // The having clause refers to the output of the left outer join while the // grouping column refers to the base table column. // While r.a and a are equivalent, the algebrizer at this point cannot detect this. // Therefore, found_target_entry will be NULL and we fall back. // TODO: Oct 14 2013, remove temporary fix (revert exception to assert) to avoid crash during algebrization GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLError, GPOS_WSZ_LIT("No attribute")); return nullptr; } var->varno = 1; // derived query is the only table in FROM expression var->varattno = found_tle->resno; var->varlevelsup = context->m_current_query_level; // reset varlevels up found_tle->resjunk = false; return (Node *) var; } return (Node *) var; } if (IsA(node, CommonTableExpr)) { CommonTableExpr *cte = (CommonTableExpr *) gpdb::CopyObject(node); context->m_current_query_level++; GPOS_ASSERT(IsA(cte->ctequery, Query)); cte->ctequery = gpdb::MutateQueryOrExpressionTree( cte->ctequery, (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) context, 0 // mutate into cte-lists ); context->m_current_query_level--; return (Node *) cte; } if (IsA(node, SubLink)) { SubLink *old_sublink = (SubLink *) node; SubLink *new_sublink = MakeNode(SubLink); new_sublink->subLinkType = old_sublink->subLinkType; new_sublink->location = old_sublink->location; new_sublink->operName = (List *) gpdb::CopyObject(old_sublink->operName); new_sublink->testexpr = gpdb::MutateQueryOrExpressionTree( old_sublink->testexpr, (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) context, 0 // mutate into cte-lists ); context->m_current_query_level++; GPOS_ASSERT(IsA(old_sublink->subselect, Query)); // One need to call the Query mutator for subselect and take into // account that SubLink can be multi-level. Therefore, the // context->m_current_query_level must be modified properly // while diving into such nested SubLink. new_sublink->subselect = RunExtractAggregatesMutator(old_sublink->subselect, context); context->m_current_query_level--; return (Node *) new_sublink; } if (IsA(node, Query)) { // Mutate Query tree and ignore rtable subqueries in order to modify // m_current_query_level properly when mutating them below. Query *query = gpdb::MutateQueryTree( (Query *) node, (MutatorWalkerFn) RunExtractAggregatesMutator, context, QTW_IGNORE_RT_SUBQUERIES); ListCell *lc; ForEach(lc, query->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); if (RTE_SUBQUERY == rte->rtekind) { Query *subquery = rte->subquery; context->m_current_query_level++; rte->subquery = (Query *) RunExtractAggregatesMutator( (Node *) subquery, context); context->m_current_query_level--; gpdb::GPDBFree(subquery); } } return (Node *) query; } return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) RunExtractAggregatesMutator, context); } // Create a new entry in the derived table and return its corresponding var Var * CQueryMutators::MakeVarInDerivedTable(Node *node, SContextGrpbyPlMutator *context) { GPOS_ASSERT(nullptr != node); GPOS_ASSERT(nullptr != context); GPOS_ASSERT(IsA(node, Aggref) || IsA(node, Var) || IsA(node, GroupingFunc)); // Append a new target entry for the node to the derived target list ... const ULONG attno = gpdb::ListLength(context->m_lower_table_tlist) + 1; TargetEntry *tle = nullptr; if (IsA(node, Aggref) || IsA(node, GroupingFunc)) { tle = GetTargetEntryForAggExpr(context->m_mp, context->m_mda, node, attno); } else if (IsA(node, Var)) { tle = gpdb::MakeTargetEntry((Expr *) node, (AttrNumber) attno, nullptr, false); } context->m_lower_table_tlist = gpdb::LAppend(context->m_lower_table_tlist, tle); // ... and return a Var referring to it in its stead // NB: Since the new tle is appended at the top query level, Var::varlevelsup // should equal the current nested level. This will take care of any outer references // to the original tlist. Var *new_var = gpdb::MakeVar(1 /* varno */, attno, gpdb::ExprType((Node *) node), gpdb::ExprTypeMod((Node *) node), context->m_current_query_level /* varlevelsup */); return new_var; } // Check if a matching entry already exists in the list of target // entries, if yes return its corresponding var, otherwise return NULL Node * CQueryMutators::FindNodeInGroupByTargetList(Node *node, SContextGrpbyPlMutator *context) { GPOS_ASSERT(nullptr != node); GPOS_ASSERT(nullptr != context); TargetEntry *found_tle = gpdb::FindFirstMatchingMemberInTargetList( node, context->m_lower_table_tlist); if (nullptr != found_tle) { gpdb::GPDBFree(node); // NB: Var::varlevelsup is set to the current query level since the created // Var must reference the group by targetlist at the top level. Var *new_var = gpdb::MakeVar(1 /* varno */, found_tle->resno, gpdb::ExprType((Node *) found_tle->expr), gpdb::ExprTypeMod((Node *) found_tle->expr), context->m_current_query_level /* varlevelsup */); found_tle->resjunk = false; return (Node *) new_var; } return nullptr; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::FlatCopyAggref // // @doc: // Make a copy of the aggref (minus the arguments) //--------------------------------------------------------------------------- Aggref * CQueryMutators::FlatCopyAggref(Aggref *old_aggref) { Aggref *new_aggref = MakeNode(Aggref); *new_aggref = *old_aggref; new_aggref->agglevelsup = 0; // This is not strictly necessary: we seem to ALWAYS assgin to args from // the callers // Explicitly setting this both to be safe and to be clear that we are // intentionally NOT copying the args new_aggref->args = NIL; return new_aggref; } // Increment the levels up of outer references Var * CQueryMutators::IncrLevelsUpIfOuterRef(Var *var) { GPOS_ASSERT(nullptr != var); Var *var_copy = (Var *) gpdb::CopyObject(var); if (0 != var_copy->varlevelsup) { var_copy->varlevelsup++; } return var_copy; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::NormalizeHaving // // @doc: // Pull up having qual into a select and fix correlated references // to the top-level query //--------------------------------------------------------------------------- Query * CQueryMutators::NormalizeHaving(CMemoryPool *mp, CMDAccessor *md_accessor, const Query *query) { Query *query_copy = (Query *) gpdb::CopyObject(const_cast(query)); if (nullptr == query->havingQual) { return query_copy; } Query *new_query = ConvertToDerivedTable(query_copy, true /*should_fix_target_list*/, false /*should_fix_having_qual*/); gpdb::GPDBFree(query_copy); RangeTblEntry *rte = ((RangeTblEntry *) gpdb::ListNth(new_query->rtable, 0)); Query *derived_table_query = (Query *) rte->subquery; // Add all necessary target list entries of subquery // into the target list of the RTE as well as the new top most query ListCell *lc = nullptr; ULONG num_target_entries = 1; ForEach(lc, derived_table_query->targetList) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); GPOS_ASSERT(nullptr != target_entry); // Add to the target lists: // (1) All grouping / sorting columns even if they do not appear in the subquery output (resjunked) // (2) All non-resjunked target list entries if (CTranslatorUtils::IsGroupingColumn( target_entry, derived_table_query->groupClause) || CTranslatorUtils::IsSortingColumn( target_entry, derived_table_query->sortClause) || !target_entry->resjunk) { TargetEntry *new_target_entry = MakeTopLevelTargetEntry(target_entry, num_target_entries); new_query->targetList = gpdb::LAppend(new_query->targetList, new_target_entry); // Ensure that such target entries is not suppressed in the target list of the RTE // and has a name target_entry->resname = GetTargetEntryColName(target_entry, derived_table_query); target_entry->resjunk = false; new_target_entry->ressortgroupref = target_entry->ressortgroupref; num_target_entries++; } } SContextGrpbyPlMutator context(mp, md_accessor, derived_table_query, derived_table_query->targetList); // fix outer references in the qual new_query->jointree->quals = RunExtractAggregatesMutator(derived_table_query->havingQual, &context); derived_table_query->havingQual = nullptr; ReassignSortClause(new_query, rte->subquery); if (!rte->subquery->hasAggs && NIL == rte->subquery->groupClause && NIL == rte->subquery->groupingSets) { // if the derived table has no grouping columns or aggregates then the // subquery is equivalent to select XXXX FROM CONST-TABLE // (where XXXX is the original subquery's target list) Query *new_subquery = MakeNode(Query); new_subquery->commandType = CMD_SELECT; new_subquery->targetList = NIL; new_subquery->hasAggs = false; new_subquery->hasWindowFuncs = false; new_subquery->hasSubLinks = false; ListCell *lc = nullptr; ForEach(lc, rte->subquery->targetList) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); GPOS_ASSERT(nullptr != target_entry); GPOS_ASSERT(!target_entry->resjunk); new_subquery->targetList = gpdb::LAppend(new_subquery->targetList, (TargetEntry *) gpdb::CopyObject(target_entry)); } gpdb::GPDBFree(rte->subquery); rte->subquery = new_subquery; rte->subquery->jointree = MakeNode(FromExpr); rte->subquery->groupClause = NIL; rte->subquery->groupingSets = NIL; rte->subquery->sortClause = NIL; rte->subquery->windowClause = NIL; } return new_query; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::NormalizeQuery // // @doc: // Normalize queries with having and group by clauses //--------------------------------------------------------------------------- /* Mutator context for RTE_GROUP Var replacement */ struct FlattenGroupRTEContext { Query *query; }; static Node * FlattenGroupRTEMutator(Node *node, void *ctx) { if (node == nullptr) return nullptr; if (IsA(node, Var)) { Var *var = (Var *) node; struct FlattenGroupRTEContext *context = (struct FlattenGroupRTEContext *) ctx; Query *query = context->query; if (var->varlevelsup == 0 && var->varno >= 1 && var->varno <= (unsigned int) list_length(query->rtable)) { RangeTblEntry *rte = (RangeTblEntry *) list_nth(query->rtable, var->varno - 1); if (rte->rtekind == RTE_GROUP) { /* varattno is 1-based index into groupexprs */ int idx = var->varattno - 1; if (idx >= 0 && idx < list_length(rte->groupexprs)) { Node *grp_expr = (Node *) list_nth(rte->groupexprs, idx); /* deep-copy and recurse in case the group expr * itself references another RTE_GROUP */ return FlattenGroupRTEMutator( (Node *) copyObject(grp_expr), ctx); } } } return (Node *) copyObject(var); } return expression_tree_mutator(node, FlattenGroupRTEMutator, ctx); } /* Replace Vars that reference RTE_GROUP entries with the underlying * grouping expressions. PG17+ adds an RTE_GROUP entry to the rtable for * queries with GROUP BY; ORCA does not know about this RTE kind, so we * must expand it before translating the query tree. */ static Query * FlattenGroupRTEVars(Query *query) { if (!query->hasGroupRTE) return query; struct FlattenGroupRTEContext ctx; ctx.query = query; return (Query *) query_tree_mutator(query, FlattenGroupRTEMutator, &ctx, QTW_DONT_COPY_QUERY); } Query * CQueryMutators::NormalizeQuery(CMemoryPool *mp, CMDAccessor *md_accessor, const Query *query, ULONG query_level) { /* PG17+: expand RTE_GROUP Vars before further normalization */ Query *pqueryFlattenedGroup = FlattenGroupRTEVars(const_cast(query)); // flatten join alias vars defined at the current level of the query Query *pqueryResolveJoinVarReferences = gpdb::FlattenJoinAliasVar(pqueryFlattenedGroup, query_level); // eliminate distinct clause Query *pqueryEliminateDistinct = CQueryMutators::EliminateDistinctClause(pqueryResolveJoinVarReferences); GPOS_ASSERT(nullptr == pqueryEliminateDistinct->distinctClause); gpdb::GPDBFree(pqueryResolveJoinVarReferences); // normalize window operator's project list Query *pqueryWindowPlNormalized = CQueryMutators::NormalizeWindowProjList( mp, md_accessor, pqueryEliminateDistinct); gpdb::GPDBFree(pqueryEliminateDistinct); // pull-up having quals into a select Query *pqueryHavingNormalized = CQueryMutators::NormalizeHaving( mp, md_accessor, pqueryWindowPlNormalized); GPOS_ASSERT(nullptr == pqueryHavingNormalized->havingQual); gpdb::GPDBFree(pqueryWindowPlNormalized); // normalize the group by project list Query *new_query = CQueryMutators::NormalizeGroupByProjList( mp, md_accessor, pqueryHavingNormalized); gpdb::GPDBFree(pqueryHavingNormalized); return new_query; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::GetTargetEntry // // @doc: // Given an Target list entry in the derived table, create a new // TargetEntry to be added to the top level query. This function allocates // memory //--------------------------------------------------------------------------- TargetEntry * CQueryMutators::MakeTopLevelTargetEntry(TargetEntry *old_target_entry, ULONG attno) { Var *new_var = gpdb::MakeVar( 1, (AttrNumber) attno, gpdb::ExprType((Node *) old_target_entry->expr), gpdb::ExprTypeMod((Node *) old_target_entry->expr), 0 // query levelsup ); TargetEntry *new_target_entry = gpdb::MakeTargetEntry( (Expr *) new_var, (AttrNumber) attno, old_target_entry->resname, old_target_entry->resjunk); return new_target_entry; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::GetTargetEntryColName // // @doc: // Return the column name of the target list entry //--------------------------------------------------------------------------- CHAR * CQueryMutators::GetTargetEntryColName(TargetEntry *target_entry, Query * /*query*/) { if (nullptr != target_entry->resname) { return target_entry->resname; } // Since a resjunked target list entry will not have a column name create a dummy column name CWStringConst dummy_colname(GPOS_WSZ_LIT("?column?")); return CTranslatorUtils::CreateMultiByteCharStringFromWCString( dummy_colname.GetBuffer()); } //--------------------------------------------------------------------------- // CQueryMutators::ConvertToDerivedTable // // Convert "original_query" into two nested Query structs // and return the new upper query. // // upper_query // original_query ===> | // lower_query // // - The result lower Query has: // * The original rtable, join tree, groupClause, WindowClause, etc., // with modified varlevelsup and ctelevelsup fields, as needed // * The original targetList, either modified or unmodified, // depending on should_fix_target_list // * The original havingQual, either modified or unmodified, // depending on should_fix_having_qual // // - The result upper Query has: // * a single RTE, pointing to the lower query // * the CTE list of the original query // (CTE levels in original have been fixed up) // * an empty target list // //--------------------------------------------------------------------------- Query * CQueryMutators::ConvertToDerivedTable(const Query *original_query, BOOL should_fix_target_list, BOOL should_fix_having_qual) { // Step 1: Make a copy of the original Query, this will become the lower query Query *query_copy = (Query *) gpdb::CopyObject(const_cast(original_query)); // Step 2: Remove things from the query copy that will go in the new, upper Query object // or won't be modified Node *having_qual = nullptr; if (!should_fix_having_qual) { having_qual = query_copy->havingQual; query_copy->havingQual = nullptr; } List *original_cte_list = query_copy->cteList; query_copy->cteList = NIL; /* intoPolicy and parentStmtType are GPDB-only Query fields not in PG18 */ // Step 3: fix outer references and CTE levels // increment varlevelsup in the lower query where they point to a Query // that is an ancestor of the original query Query *lower_query; { SContextIncLevelsupMutator context1(0, should_fix_target_list); lower_query = gpdb::MutateQueryTree( query_copy, (MutatorWalkerFn) RunIncrLevelsUpMutator, &context1, 0 // flags ); } // fix the CTE levels up -- while the old query is converted into a derived table, its cte list // is re-assigned to the new top-level query. The references to the ctes listed in the old query // as well as those listed before the current query level are accordingly adjusted in the new // derived table. { SContextIncLevelsupMutator context2(0 /*starting level */, should_fix_target_list); (void) gpdb::WalkQueryOrExpressionTree( (Node *) lower_query, (ExprWalkerFn) RunFixCTELevelsUpWalker, &context2, QTW_EXAMINE_RTES_BEFORE); } if (nullptr != having_qual) { lower_query->havingQual = having_qual; } // Step 4: Create a new, single range table entry for the upper query RangeTblEntry *rte = MakeNode(RangeTblEntry); rte->rtekind = RTE_SUBQUERY; rte->subquery = lower_query; rte->inFromCl = true; rte->subquery->cteList = NIL; // create a new range table reference for the new RTE RangeTblRef *rtref = MakeNode(RangeTblRef); rtref->rtindex = 1; // Step 5: Create a new upper query with the new RTE in its from clause Query *upper_query = MakeNode(Query); upper_query->cteList = original_cte_list; upper_query->rtable = gpdb::LAppend(upper_query->rtable, rte); /* intoPolicy / parentStmtType are GPDB-only — skipped in PG18 */ FromExpr *fromexpr = MakeNode(FromExpr); fromexpr->quals = nullptr; fromexpr->fromlist = gpdb::LAppend(fromexpr->fromlist, rtref); upper_query->jointree = fromexpr; upper_query->commandType = CMD_SELECT; GPOS_ASSERT(1 == gpdb::ListLength(upper_query->rtable)); GPOS_ASSERT(false == upper_query->hasWindowFuncs); return upper_query; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::EliminateDistinctClause // // @doc: // Eliminate distinct columns by translating it into a grouping columns //--------------------------------------------------------------------------- Query * CQueryMutators::EliminateDistinctClause(const Query *query) { if (0 == gpdb::ListLength(query->distinctClause)) { return (Query *) gpdb::CopyObject(const_cast(query)); } // create a derived table out of the previous query Query *new_query = ConvertToDerivedTable(query, true /*should_fix_target_list*/, true /*should_fix_having_qual*/); GPOS_ASSERT(1 == gpdb::ListLength(new_query->rtable)); Query *derived_table_query = (Query *) ((RangeTblEntry *) gpdb::ListNth(new_query->rtable, 0)) ->subquery; ReassignSortClause(new_query, derived_table_query); new_query->targetList = NIL; List *target_entries = derived_table_query->targetList; ListCell *lc = nullptr; // build the project list of the new top-level query ForEach(lc, target_entries) { ULONG resno = gpdb::ListLength(new_query->targetList) + 1; TargetEntry *target_entry = (TargetEntry *) lfirst(lc); GPOS_ASSERT(nullptr != target_entry); if (!target_entry->resjunk) { // create a new target entry that points to the corresponding entry in the derived table Var *new_var = gpdb::MakeVar(1, target_entry->resno, gpdb::ExprType((Node *) target_entry->expr), gpdb::ExprTypeMod((Node *) target_entry->expr), 0 // query levels up ); TargetEntry *new_target_entry = gpdb::MakeTargetEntry((Expr *) new_var, (AttrNumber) resno, target_entry->resname, false); new_target_entry->ressortgroupref = target_entry->ressortgroupref; new_query->targetList = gpdb::LAppend(new_query->targetList, new_target_entry); } if (0 < target_entry->ressortgroupref && !CTranslatorUtils::IsGroupingColumn( target_entry, derived_table_query->groupClause) && !CTranslatorUtils::IsReferencedInWindowSpec( target_entry, derived_table_query->windowClause)) { // initialize the ressortgroupref of target entries not used in the grouping clause target_entry->ressortgroupref = 0; } } if (gpdb::ListLength(new_query->targetList) != gpdb::ListLength(query->distinctClause)) { GPOS_RAISE( gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT( "DISTINCT operation on a subset of target list columns")); } ListCell *pl = nullptr; ForEach(pl, query->distinctClause) { SortGroupClause *sort_group_clause = (SortGroupClause *) lfirst(pl); GPOS_ASSERT(nullptr != sort_group_clause); SortGroupClause *new_sort_group_clause = MakeNode(SortGroupClause); new_sort_group_clause->tleSortGroupRef = sort_group_clause->tleSortGroupRef; new_sort_group_clause->eqop = sort_group_clause->eqop; new_sort_group_clause->sortop = sort_group_clause->sortop; new_sort_group_clause->nulls_first = sort_group_clause->nulls_first; new_query->groupClause = gpdb::LAppend(new_query->groupClause, new_sort_group_clause); } new_query->distinctClause = NIL; derived_table_query->distinctClause = NIL; return new_query; } //--------------------------------------------------------------------------- // CQueryMutators::NeedsProjListWindowNormalization // // Check whether the window operator's project list only contains // window functions, vars, or expressions used in the window specification. // Examples of queries that will be normalized: // select rank() over(...) -1 // select rank() over(order by a), a+b // select (SQ), rank over(...) // Some of these, e.g. the second one, may not strictly need normalization. //--------------------------------------------------------------------------- BOOL CQueryMutators::NeedsProjListWindowNormalization(const Query *query) { if (!query->hasWindowFuncs) { return false; } ListCell *lc = nullptr; ForEach(lc, query->targetList) { TargetEntry *target_entry = (TargetEntry *) lfirst(lc); if (!CTranslatorUtils::IsReferencedInWindowSpec(target_entry, query->windowClause) && !IsA(target_entry->expr, WindowFunc) && !IsA(target_entry->expr, Var)) { // computed columns in the target list that is not // used in the order by or partition by of the window specification(s) return true; } } return false; } //--------------------------------------------------------------------------- // CQueryMutators::NormalizeWindowProjList // // Flatten expressions in project list to contain only window functions, // columns (vars) and columns (vars) used in the window specifications. // This is a restriction in Orca and DXL, that we don't support a mix of // window functions and general expressions in a target list. // // ORGINAL QUERY: // SELECT row_number() over() + rank() over(partition by a+b order by a-b) from foo // // NEW QUERY: // SELECT rn+rk from (SELECT row_number() over() as rn, rank() over(partition by a+b order by a-b) as rk FROM foo) foo_new //--------------------------------------------------------------------------- Query * CQueryMutators::NormalizeWindowProjList(CMemoryPool *mp, CMDAccessor *md_accessor, const Query *original_query) { Query *query_copy = (Query *) gpdb::CopyObject(const_cast(original_query)); if (!NeedsProjListWindowNormalization(original_query)) { return query_copy; } // we assume here that we have already performed the transformGroupedWindows() // transformation, which separates GROUP BY from window functions GPOS_ASSERT(nullptr == original_query->distinctClause); GPOS_ASSERT(nullptr == original_query->groupClause); GPOS_ASSERT(nullptr == original_query->groupingSets); // we do not fix target list of the derived table since we will be mutating it below // to ensure that it does not have window functions Query *upper_query = ConvertToDerivedTable(query_copy, false /*should_fix_target_list*/, true /*should_fix_having_qual*/); gpdb::GPDBFree(query_copy); GPOS_ASSERT(1 == gpdb::ListLength(upper_query->rtable)); Query *lower_query = (Query *) ((RangeTblEntry *) gpdb::ListNth(upper_query->rtable, 0)) ->subquery; SContextGrpbyPlMutator projlist_context(mp, md_accessor, lower_query, nullptr); ListCell *lc = nullptr; List *target_entries = lower_query->targetList; ForEach(lc, target_entries) { // If this target entry is referenced in a window spec, is a var or is a window function, // add it to the lower target list. Adjust the outer refs to ancestors of the orginal // query by adding one to the varlevelsup. Add a var to the upper target list to refer // to it. // // Any other target entries, add them to the upper target list, and ensure that any vars // they reference in the current scope are produced by the lower query and are adjusted // to refer to the new, single RTE of the upper query. TargetEntry *target_entry = (TargetEntry *) lfirst(lc); const ULONG ulResNoNew = gpdb::ListLength(upper_query->targetList) + 1; if (CTranslatorUtils::IsReferencedInWindowSpec( target_entry, original_query->windowClause)) { // This entry is used in a window spec. Since this clause refers to its argument by // ressortgroupref, the target entry must be preserved in the lower target list, // so insert the entire Expr of the TargetEntry into the lower target list, using the // same ressortgroupref and also preserving the resjunk attribute. SContextIncLevelsupMutator level_context( 0, true /* should_fix_top_level_target_list */); TargetEntry *lower_target_entry = (TargetEntry *) gpdb::MutateExpressionTree( (Node *) target_entry, (MutatorWalkerFn) RunIncrLevelsUpMutator, &level_context); lower_target_entry->resno = gpdb::ListLength(projlist_context.m_lower_table_tlist) + 1; projlist_context.m_lower_table_tlist = gpdb::LAppend( projlist_context.m_lower_table_tlist, lower_target_entry); BOOL is_sorting_col = CTranslatorUtils::IsSortingColumn( target_entry, original_query->sortClause); if (!target_entry->resjunk || is_sorting_col) { // the target list entry is present in the query output or it is used in the ORDER BY, // so also add it to the target list of the new upper Query Var *new_var = gpdb::MakeVar( 1, lower_target_entry->resno, gpdb::ExprType((Node *) target_entry->expr), gpdb::ExprTypeMod((Node *) target_entry->expr), 0 // query levels up ); TargetEntry *upper_target_entry = gpdb::MakeTargetEntry( (Expr *) new_var, ulResNoNew, target_entry->resname, target_entry->resjunk); if (is_sorting_col) { // This target list entry is referenced in the ORDER BY as well, evaluated in the upper // query. Set the ressortgroupref, keeping the same number as in the original query. upper_target_entry->ressortgroupref = lower_target_entry->ressortgroupref; } // Set target list entry of the derived table to be non-resjunked, since we need it in the upper lower_target_entry->resjunk = false; upper_query->targetList = gpdb::LAppend(upper_query->targetList, upper_target_entry); } } else { // push any window functions in the target entry into the lower target list // and also add any needed vars to the lower target list target_entry->resno = ulResNoNew; TargetEntry *upper_target_entry = (TargetEntry *) gpdb::MutateExpressionTree( (Node *) target_entry, (MutatorWalkerFn) RunWindowProjListMutator, &projlist_context); upper_query->targetList = gpdb::LAppend(upper_query->targetList, upper_target_entry); } } // once we finish the above loop, the context has accumulated all the needed vars, // window spec expressions and window functions for the lower targer list lower_query->targetList = projlist_context.m_lower_table_tlist; GPOS_ASSERT(gpdb::ListLength(upper_query->targetList) <= gpdb::ListLength(original_query->targetList)); ReassignSortClause(upper_query, lower_query); return upper_query; } //--------------------------------------------------------------------------- // @function: // CQueryMutators::RunWindowProjListMutator // // @doc: // Traverse the project list of extract all window functions in an // arbitrarily complex project element //--------------------------------------------------------------------------- Node * CQueryMutators::RunWindowProjListMutator(Node *node, SContextGrpbyPlMutator *context) { if (nullptr == node) { return nullptr; } const ULONG resno = gpdb::ListLength(context->m_lower_table_tlist) + 1; if (IsA(node, WindowFunc) && 0 == context->m_current_query_level) { // This is a window function that needs to be executed in the lower Query. // Insert window function as a new TargetEntry into the lower target list // (requires incrementing varlevelsup on its arguments). // Create a var that refers to the newly created lower TargetEntry and return // that, to be used instead of the window function in the upper TargetEntry. // make a copy of the tree and increment varlevelsup, using a different mutator SContextIncLevelsupMutator levelsUpContext( context->m_current_query_level, true /* should_fix_top_level_target_list */); WindowFunc *window_func = (WindowFunc *) gpdb::MutateExpressionTree( node, (MutatorWalkerFn) RunIncrLevelsUpMutator, &levelsUpContext); GPOS_ASSERT(IsA(window_func, WindowFunc)); // get the function name and create a new target entry for window_func CMDIdGPDB *mdid_func = GPOS_NEW(context->m_mp) CMDIdGPDB(IMDId::EmdidGeneral, window_func->winfnoid); const CWStringConst *str = CMDAccessorUtils::PstrWindowFuncName(context->m_mda, mdid_func); mdid_func->Release(); TargetEntry *target_entry = gpdb::MakeTargetEntry( (Expr *) window_func, (AttrNumber) resno, CTranslatorUtils::CreateMultiByteCharStringFromWCString( str->GetBuffer()), false /* resjunk */ ); context->m_lower_table_tlist = gpdb::LAppend(context->m_lower_table_tlist, target_entry); // return a variable referring to the lower table's corresponding target entry, // to be used somewhere in the upper query's target list Var *new_var = gpdb::MakeVar( 1, // derived query which is now the only table in FROM expression (AttrNumber) resno, gpdb::ExprType(node), gpdb::ExprTypeMod(node), 0 // query levelsup ); return (Node *) new_var; } if (IsA(node, Var) && ((Var *) node)->varlevelsup == context->m_current_query_level) { // This is a Var referencing the original query scope. It now needs to reference // the new upper query scope. // Since the rtable of the upper Query is different from that of the original // Query, calculate the new varno (always 1) and varattno to use. Var *var = (Var *) gpdb::CopyObject(node); // Set varlevelsup to 0 temporarily while searching in the target list var->varlevelsup = 0; TargetEntry *found_tle = gpdb::FindFirstMatchingMemberInTargetList( (Node *) var, context->m_lower_table_tlist); if (nullptr == found_tle) { // this var is not yet provided by the lower target list, so // create a new TargetEntry for it Node *var_copy = (Node *) gpdb::CopyObject(var); return (Node *) MakeVarInDerivedTable(var_copy, context); } var->varno = 1; // derived query is the only table in FROM expression var->varattno = found_tle->resno; var->varlevelsup = context->m_current_query_level; // reset varlevels up found_tle->resjunk = false; return (Node *) var; } if (IsA(node, Query)) { // recurse into Query nodes context->m_current_query_level++; Query *result = gpdb::MutateQueryTree( (Query *) node, (MutatorWalkerFn) RunWindowProjListMutator, context, 0); context->m_current_query_level--; return (Node *) result; } return gpdb::MutateExpressionTree( node, (MutatorWalkerFn) CQueryMutators::RunWindowProjListMutator, context); } //--------------------------------------------------------------------------- // @function: // CQueryMutators::ReassignSortClause // // @doc: // Reassign the sorting clause from the derived table to the new top-level query //--------------------------------------------------------------------------- void CQueryMutators::ReassignSortClause(Query *top_level_query, Query *derived_table_query) { top_level_query->sortClause = derived_table_query->sortClause; top_level_query->limitOffset = derived_table_query->limitOffset; top_level_query->limitCount = derived_table_query->limitCount; derived_table_query->sortClause = nullptr; derived_table_query->limitOffset = nullptr; derived_table_query->limitCount = nullptr; } // EOF