//--------------------------------------------------------------------------- // Greenplum Database // Copyright (C) 2011 EMC Greenplum, Inc. // // @filename: // COptTasks.cpp // // @doc: // Routines to perform optimization related tasks using the gpos framework // // @test: // // //--------------------------------------------------------------------------- #include "gpopt/utils/COptTasks.h" extern "C" { /* cdb/cdbvars.h not in PG18 */ /* optimizer/hints.h not in PG18 */ /* optimizer/orca.h not in PG18 — ORCA is now an extension */ #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/gpdbgucs.h" /* PG18 compat: GPDB session/command counters don't exist — use 0 */ #ifndef gp_session_id #define gp_session_id 0 #endif #ifndef gp_command_count #define gp_command_count 0 #endif #ifndef PG_FUNCNAME_MACRO #define PG_FUNCNAME_MACRO __func__ #endif } #include "gpos/_api.h" #include "gpos/base.h" #include "gpos/common/CAutoP.h" #include "gpos/error/CException.h" #include "gpos/io/COstreamString.h" #include "gpos/memory/CAutoMemoryPool.h" #include "gpos/memory/set.h" #include "gpos/task/CAutoTraceFlag.h" #include "gpdbcost/CCostModelGPDB.h" #include "gpdbcost/CCostModelPG.h" #include "compat/utils/gpdbgucs.h" // Mirror the enum values from pg_orca.cpp; keep in sync. #define PG_ORCA_COST_MODEL_GPDB 0 #define PG_ORCA_COST_MODEL_PG 1 #include "gpopt/base/CAutoOptCtxt.h" #include "gpopt/config/CConfigParamMapping.h" #include "gpopt/engine/CCTEConfig.h" #include "gpopt/engine/CEnumeratorConfig.h" #include "gpopt/engine/CHint.h" #include "gpopt/engine/CStatisticsConfig.h" #include "gpopt/eval/CConstExprEvaluatorDXL.h" #include "gpopt/exception.h" #include "gpopt/gpdbwrappers.h" #include "gpopt/hints/CPlanHint.h" #include "gpopt/mdcache/CAutoMDAccessor.h" #include "gpopt/mdcache/CMDCache.h" #include "gpopt/minidump/CMinidumperUtils.h" #include "gpopt/optimizer/COptimizer.h" #include "gpopt/optimizer/COptimizerConfig.h" #include "gpopt/relcache/CMDProviderRelcache.h" #include "gpopt/translate/CContextDXLToPlStmt.h" #include "gpopt/translate/CTranslatorDXLToExpr.h" #include "gpopt/translate/CTranslatorDXLToPlStmt.h" #include "gpopt/translate/CTranslatorExprToDXL.h" #include "gpopt/translate/CTranslatorQueryToDXL.h" #include "gpopt/translate/CTranslatorRelcacheToDXL.h" #include "gpopt/translate/CTranslatorUtils.h" #include "gpopt/utils/CConstExprEvaluatorProxy.h" #include "gpopt/utils/gpdbdefs.h" #include "gpopt/xforms/CXformFactory.h" #include "naucrates/base/CQueryToDXLResult.h" #include "naucrates/dxl/CDXLUtils.h" #include "naucrates/dxl/CIdGenerator.h" #include "naucrates/dxl/operators/CDXLNode.h" #include "naucrates/dxl/parser/CParseHandlerDXL.h" #include "naucrates/exception.h" #include "naucrates/init.h" #include "naucrates/md/CMDIdCast.h" #include "naucrates/md/CMDIdRelStats.h" #include "naucrates/md/CMDIdScCmp.h" #include "naucrates/md/CSystemId.h" #include "naucrates/md/IMDId.h" #include "naucrates/md/IMDRelStats.h" #include "naucrates/traceflags/traceflags.h" using namespace gpos; using namespace gpopt; using namespace gpdxl; using namespace gpdbcost; // size of error buffer #define GPOPT_ERROR_BUFFER_SIZE 10 * 1024 * 1024 // definition of default AutoMemoryPool #define AUTO_MEM_POOL(amp) CAutoMemoryPool amp(CAutoMemoryPool::ElcExc) // default id for the source system const CSystemId default_sysid(IMDId::EmdidGeneral, GPOS_WSZ_STR_LENGTH("GPDB")); /* plan_hint_hook_type is GPDB-only */ // Check one-to-one mapping of row hint types /* RVT_* / plan_hint_hook are GPDB-only — assertions skipped in PG18 */ //--------------------------------------------------------------------------- // @function: // SOptContext::SOptContext // // @doc: // Ctor // //--------------------------------------------------------------------------- SOptContext::SOptContext() = default; //--------------------------------------------------------------------------- // @function: // SOptContext::Free // // @doc: // Free all members except those pointed to by either input or // output // //--------------------------------------------------------------------------- void SOptContext::Free(SOptContext::EPin input, SOptContext::EPin output) const { if (nullptr != m_query_dxl && epinQueryDXL != input && epinQueryDXL != output) { gpdb::GPDBFree(m_query_dxl); } if (nullptr != m_query && epinQuery != input && epinQuery != output) { gpdb::GPDBFree(m_query); } if (nullptr != m_plan_dxl && epinPlanDXL != input && epinPlanDXL != output) { gpdb::GPDBFree(m_plan_dxl); } if (nullptr != m_plan_stmt && epinPlStmt != input && epinPlStmt != output) { gpdb::GPDBFree(m_plan_stmt); } if (nullptr != m_error_msg && epinErrorMsg != input && epinErrorMsg != output) { gpdb::GPDBFree(m_error_msg); } } //--------------------------------------------------------------------------- // @function: // SOptContext::CloneErrorMsg // // @doc: // Clone m_error_msg to given memory context. Return NULL if there is no // error message. // //--------------------------------------------------------------------------- CHAR * SOptContext::CloneErrorMsg(MemoryContext context, BOOL *clone_failed) const { *clone_failed = false; if (nullptr == context || nullptr == m_error_msg) { return nullptr; } CHAR *error_msg; GPOS_TRY { error_msg = gpdb::MemCtxtStrdup(context, m_error_msg); } GPOS_CATCH_EX(ex) { error_msg = nullptr; *clone_failed = true; } GPOS_CATCH_END; return error_msg; } //--------------------------------------------------------------------------- // @function: // SOptContext::Cast // // @doc: // Casting function // //--------------------------------------------------------------------------- SOptContext * SOptContext::Cast(void *ptr) { GPOS_ASSERT(nullptr != ptr); return reinterpret_cast(ptr); } //--------------------------------------------------------------------------- // @function: // CreateMultiByteCharStringFromWCString // // @doc: // Return regular string from wide-character string // //--------------------------------------------------------------------------- CHAR * COptTasks::CreateMultiByteCharStringFromWCString(const WCHAR *wcstr) { GPOS_ASSERT(nullptr != wcstr); const ULONG input_len = GPOS_WSZ_LENGTH(wcstr); const ULONG wchar_size = GPOS_SIZEOF(WCHAR); const ULONG max_len = (input_len + 1) * wchar_size; CHAR *str = (CHAR *) gpdb::GPDBAlloc(max_len); gpos::clib::Wcstombs(str, const_cast(wcstr), max_len); str[max_len - 1] = '\0'; return str; } //--------------------------------------------------------------------------- // @function: // COptTasks::Execute // // @doc: // Execute a task using GPOS. TODO extend gpos to provide // this functionality // //--------------------------------------------------------------------------- void COptTasks::Execute(void *(*func)(void *), void *func_arg) { Assert(func); CHAR *err_buf = (CHAR *) palloc(GPOPT_ERROR_BUFFER_SIZE); err_buf[0] = '\0'; // initialize DXL support InitDXL(); bool abort_flag = false; CAutoMemoryPool amp(CAutoMemoryPool::ElcNone); gpos_exec_params params; params.func = func; params.arg = func_arg; params.stack_start = ¶ms; params.error_buffer = err_buf; params.error_buffer_size = GPOPT_ERROR_BUFFER_SIZE; params.abort_requested = &abort_flag; // execute task and send log message to server log GPOS_TRY { (void) gpos_exec(¶ms); } GPOS_CATCH_EX(ex) { LogExceptionMessageAndDelete(err_buf); GPOS_RETHROW(ex); } GPOS_CATCH_END; LogExceptionMessageAndDelete(err_buf); } void COptTasks::LogExceptionMessageAndDelete(CHAR *err_buf) { if ('\0' != err_buf[0]) { elog(LOG, "%s", CreateMultiByteCharStringFromWCString((WCHAR *) err_buf)); } pfree(err_buf); } //--------------------------------------------------------------------------- // @function: // COptTasks::ConvertToPlanStmtFromDXL // // @doc: // Translate a DXL tree into a planned statement // //--------------------------------------------------------------------------- PlannedStmt * COptTasks::ConvertToPlanStmtFromDXL( CMemoryPool *mp, CMDAccessor *md_accessor, const Query *orig_query, const CDXLNode *dxlnode, bool can_set_tag, DistributionHashOpsKind distribution_hashops) { GPOS_ASSERT(nullptr != md_accessor); GPOS_ASSERT(nullptr != dxlnode); /* * Since GPDB 7 (commit 0ae9004), plan node IDs start from 0 in GPDB. * GPDB 6 and lower had plan node IDs starting from 0. */ CIdGenerator plan_id_generator(0 /* ulStartId */); CIdGenerator motion_id_generator(1 /* ulStartId */); CIdGenerator param_id_generator(0 /* ulStartId */); CContextDXLToPlStmt dxl_to_plan_stmt_ctxt( mp, &plan_id_generator, &motion_id_generator, ¶m_id_generator, distribution_hashops); // translate DXL -> PlannedStmt CTranslatorDXLToPlStmt dxl_to_plan_stmt_translator( mp, md_accessor, &dxl_to_plan_stmt_ctxt, gpdb::GetGPSegmentCount()); return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL( dxlnode, orig_query, can_set_tag); } //--------------------------------------------------------------------------- // @function: // COptTasks::LoadSearchStrategy // // @doc: // Load search strategy from given file // //--------------------------------------------------------------------------- CSearchStageArray * COptTasks::LoadSearchStrategy(CMemoryPool *mp, char *path) { CSearchStageArray *search_strategy_arr = nullptr; CParseHandlerDXL *dxl_parse_handler = nullptr; GPOS_TRY { if (nullptr != path && strlen(path) != 0) { dxl_parse_handler = CDXLUtils::GetParseHandlerForDXLFile(mp, path, nullptr); if (nullptr != dxl_parse_handler) { elog(DEBUG2, "\n[OPT]: Using search strategy in (%s)", path); search_strategy_arr = dxl_parse_handler->GetSearchStageArray(); search_strategy_arr->AddRef(); } } } GPOS_CATCH_EX(ex) { if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError)) { GPOS_RETHROW(ex); } elog(DEBUG2, "\n[OPT]: Using default search strategy"); GPOS_RESET_EX; } GPOS_CATCH_END; if (dxl_parse_handler) GPOS_DELETE(dxl_parse_handler); return search_strategy_arr; } //--------------------------------------------------------------------------- // @function: // COptTasks::CreateOptimizerConfig // // @doc: // Create the optimizer configuration // //--------------------------------------------------------------------------- COptimizerConfig * COptTasks::CreateOptimizerConfig(CMemoryPool *mp, ICostModel *cost_model, CPlanHint *plan_hints) { // get chosen plan number, cost threshold ULLONG plan_id = 0; /* optimizer_plan_id is GPDB-only */ ULLONG num_samples = (ULLONG) 1000 /* optimizer_samples_number */; DOUBLE cost_threshold = (DOUBLE) 0.0 /* optimizer_cost_threshold */; DOUBLE damping_factor_filter = (DOUBLE) optimizer_damping_factor_filter; DOUBLE damping_factor_join = (DOUBLE) optimizer_damping_factor_join; DOUBLE damping_factor_groupby = (DOUBLE) optimizer_damping_factor_groupby; /* GPDB-only GUCs — use fixed defaults for PG18 single-node mode */ ULONG cte_inlining_cutoff = (ULONG) optimizer_cte_inlining_bound; ULONG join_arity_for_associativity_commutativity = (ULONG) 10; /* optimizer_join_arity_for_associativity_commutativity: match join_order_threshold to avoid exponential search for large joins */ ULONG array_expansion_threshold = (ULONG) 100; /* optimizer_array_expansion_threshold */ ULONG join_order_threshold = (ULONG) 10; /* optimizer_join_order_threshold */ ULONG broadcast_threshold = (ULONG) 100000; /* optimizer_penalize_broadcast_threshold */ ULONG push_group_by_below_setop_threshold = (ULONG) 10; /* optimizer_push_group_by_below_setop_threshold */ ULONG xform_bind_threshold = (ULONG) 0; /* optimizer_xform_bind_threshold */ ULONG skew_factor = (ULONG) 0; /* optimizer_skew_factor */ return GPOS_NEW(mp) COptimizerConfig( GPOS_NEW(mp) CEnumeratorConfig(mp, plan_id, num_samples, cost_threshold), GPOS_NEW(mp) CStatisticsConfig(mp, damping_factor_filter, damping_factor_join, damping_factor_groupby, MAX_STATS_BUCKETS), GPOS_NEW(mp) CCTEConfig(cte_inlining_cutoff), cost_model, GPOS_NEW(mp) CHint(join_arity_for_associativity_commutativity, array_expansion_threshold, join_order_threshold, broadcast_threshold, false, /* don't create Assert nodes for constraints, we'll * enforce them ourselves in the executor */ push_group_by_below_setop_threshold, xform_bind_threshold, skew_factor), plan_hints, GPOS_NEW(mp) CWindowOids(mp, OID(F_ROW_NUMBER), OID(F_RANK_), OID(F_DENSE_RANK_))); } //--------------------------------------------------------------------------- // @function: // COptTasks::SetCostModelParams // // @doc: // Set cost model parameters // //--------------------------------------------------------------------------- void COptTasks::SetCostModelParams(ICostModel *cost_model) { GPOS_ASSERT(nullptr != cost_model); // The tunables below address GPDB-cost-model param IDs (EcpSortTupWidthCostUnit // etc.); they do not exist in CCostModelParamsPG. Skip when running the // PG-aligned model. if (ICostModel::EcmtGPDBCalibrated != cost_model->Ecmt()) { return; } { /* optimizer_nestloop_factor is GPDB-only; skip NLJ factor adjustment */ } if (optimizer_sort_factor > 1.0 || optimizer_sort_factor < 1.0) { // change sort cost factor ICostModelParams::SCostParam *cost_param = cost_model->GetCostModelParams()->PcpLookup( CCostModelParamsGPDB::EcpSortTupWidthCostUnit); CDouble sort_factor(optimizer_sort_factor); cost_model->GetCostModelParams()->SetParam( cost_param->Id(), cost_param->Get() * optimizer_sort_factor, cost_param->GetLowerBoundVal() * optimizer_sort_factor, cost_param->GetUpperBoundVal() * optimizer_sort_factor); } if (optimizer_spilling_mem_threshold > 0.0) { ICostModelParams::SCostParam *cost_param = cost_model->GetCostModelParams()->PcpLookup( CCostModelParamsGPDB::EcpHJSpillingMemThreshold); CDouble spill_mem_threshold(optimizer_spilling_mem_threshold); cost_model->GetCostModelParams()->SetParam( cost_param->Id(), spill_mem_threshold, spill_mem_threshold - 0.0, spill_mem_threshold + 0.0); } { ICostModelParams::SCostParam *cost_param = cost_model->GetCostModelParams()->PcpLookup( CCostModelParamsGPDB::EcpIndexJoinAllowedRiskThreshold); CDouble threshold(optimizer_index_join_allowed_risk_threshold); cost_model->GetCostModelParams()->SetParam( cost_param->Id(), threshold, 0.0, threshold + 0.0); } } //--------------------------------------------------------------------------- // @function: // COptTasks::GetCostModel // // @doc: // Generate an instance of optimizer cost model // //--------------------------------------------------------------------------- ICostModel * COptTasks::GetCostModel(CMemoryPool *mp, ULONG num_segments) { ICostModel *cost_model = nullptr; if (PG_ORCA_COST_MODEL_PG == pg_orca_cost_model) { cost_model = GPOS_NEW(mp) CCostModelPG(mp, num_segments); } else { cost_model = GPOS_NEW(mp) CCostModelGPDB(mp, num_segments); } SetCostModelParams(cost_model); return cost_model; } #if 0 /* pg_hint_plan types not available in PG18 -- stub below */ //--------------------------------------------------------------------------- // @function: // GenerateJoinNodes // // @doc: // Converts OuterInnerRels into a JoinNode structure //--------------------------------------------------------------------------- CJoinHint::JoinNode * GenerateJoinNodes(CMemoryPool *mp, OuterInnerRels *outer_inner, gpos::set &aliases) { if (nullptr == outer_inner) { return nullptr; } CJoinHint::JoinNode *pair = nullptr; if (outer_inner->relation != nullptr) { // Base case: processing outer_inner leaf node char *str_buffer = GPOS_NEW_ARRAY(mp, char, strlen(outer_inner->relation) + 1); memcpy(str_buffer, outer_inner->relation, strlen(outer_inner->relation)); str_buffer[strlen(outer_inner->relation)] = '\0'; if (aliases.count(std::string(str_buffer)) > 0) { // invalid leading hint if alias is specified more than once return nullptr; } aliases.insert(std::string(str_buffer)); CWStringConst *alias = GPOS_NEW(mp) CWStringConst(mp, str_buffer /*outer_inner->relation*/); GPOS_DELETE_ARRAY(str_buffer); pair = GPOS_NEW(mp) CJoinHint::JoinNode(alias); } else if (2 == list_length(outer_inner->outer_inner_pair)) { // Recursive case: processing outer_inner branch node CJoinHint::JoinNode *left_joinnode = GenerateJoinNodes( mp, (OuterInnerRels *) lfirst(list_head(outer_inner->outer_inner_pair)), aliases); CJoinHint::JoinNode *right_joinnode = GenerateJoinNodes( mp, (OuterInnerRels *) lfirst( list_second_cell(outer_inner->outer_inner_pair)), aliases); if (nullptr == left_joinnode || nullptr == right_joinnode) { // bad input - outer_inner may be malformed CRefCount::SafeRelease(left_joinnode); CRefCount::SafeRelease(right_joinnode); return nullptr; } pair = GPOS_NEW(mp) CJoinHint::JoinNode(left_joinnode, right_joinnode, /*is_directed*/ true); } else { // bad user input // // Example: "Leading(((t1 t2)))" } return pair; } //--------------------------------------------------------------------------- // @function: // GenerateJoinNodes // // @doc: // Converts List of relation names into a JoinNode structure // // Example: // List: [t1 t2 t3 t4] // // JoinNode: . // x . // / \ . // x t4 . // / \ . // x t3 . // / \ . // t2 t1 //--------------------------------------------------------------------------- CJoinHint::JoinNode * GenerateJoinNodes(CMemoryPool *mp, List *relations) { CJoinHint::JoinNode *pair = nullptr; gpos::set aliases(mp); ListCell *l; int count = 0; foreach_with_count(l, relations, count) { char *relation = (char *) lfirst(l); if (aliases.count(std::string(relation)) > 0) { pair->Release(); // invalid leading hint if alias is specified more than once return nullptr; } aliases.insert(std::string(relation)); CWStringConst *alias = GPOS_NEW(mp) CWStringConst(mp, relation); if (count == 0) { // As we traverse the input list relations, we build up the // JoinNode pair. The first element is an edge case because there // is no existing JoinNode pair yet. pair = GPOS_NEW(mp) CJoinHint::JoinNode(alias); } else { pair = GPOS_NEW(mp) CJoinHint::JoinNode( pair, GPOS_NEW(mp) CJoinHint::JoinNode(alias), /*is_directed*/ false); } } return pair; } //--------------------------------------------------------------------------- // @function: // COptTasks::GetPlanHints // // @doc: // Generate an instance of plan hints by parsing the query tree. // //--------------------------------------------------------------------------- CPlanHint * COptTasks::GetPlanHints(CMemoryPool *mp, Query *query) { HintState *hintstate = nullptr; if (plan_hint_hook != nullptr) { // Calling plan_hint_hook creates pg_hint_plan hint structures // (see optimizer/hints.h). hintstate = (HintState *) plan_hint_hook(query); if (hintstate != nullptr && hintstate->log_level > 0) { GPOS_SET_TRACE(EopttracePrintPgHintPlanLog); } } if (nullptr == hintstate) { return nullptr; } // Following code translates pg_hint_plan hint structures into ORCA hint // structures (see gpopt/hints/CPlanHint.h). CPlanHint *plan_hints = GPOS_NEW(mp) CPlanHint(mp); // Translate ScanMethodHint => CScanHint for (int ul = 0; ul < hintstate->num_hints[HINT_TYPE_SCAN_METHOD]; ul++) { ScanMethodHint *scan_hint = (ScanMethodHint *) hintstate->scan_hints[ul]; if (nullptr == scan_hint->relname) { continue; } CScanHint::EType type = CScanHint::Sentinal; switch (scan_hint->base.hint_keyword) { case HINT_KEYWORD_SEQSCAN: { type = CScanHint::SeqScan; break; } case HINT_KEYWORD_NOSEQSCAN: { type = CScanHint::NoSeqScan; break; } case HINT_KEYWORD_INDEXSCAN: { type = CScanHint::IndexScan; break; } case HINT_KEYWORD_NOINDEXSCAN: { type = CScanHint::NoIndexScan; break; } case HINT_KEYWORD_INDEXONLYSCAN: { type = CScanHint::IndexOnlyScan; break; } case HINT_KEYWORD_NOINDEXONLYSCAN: { type = CScanHint::NoIndexOnlyScan; break; } case HINT_KEYWORD_BITMAPSCAN: { type = CScanHint::BitmapScan; break; } case HINT_KEYWORD_NOBITMAPSCAN: { type = CScanHint::NoBitmapScan; break; } default: { CWStringDynamic *error_message = GPOS_NEW(mp) CWStringDynamic( mp, GPOS_WSZ_LIT("Unsupported plan hint: ")); error_message->AppendFormat(GPOS_WSZ_LIT("%s"), scan_hint->base.keyword); GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiUnsupportedOp, error_message->GetBuffer()); break; } } CScanHint *hint = plan_hints->GetScanHint(scan_hint->relname); if (nullptr == hint) { StringPtrArray *indexnames = GPOS_NEW(mp) StringPtrArray(mp); ListCell *l; foreach (l, scan_hint->indexnames) { char *indexname = (char *) lfirst(l); indexnames->Append(GPOS_NEW(mp) CWStringConst(mp, indexname)); } hint = GPOS_NEW(mp) CScanHint( mp, GPOS_NEW(mp) CWStringConst(mp, scan_hint->relname), indexnames); plan_hints->AddHint(hint); } hint->AddType(type); } // Translate RowsHint => CRowHint for (int hint_index = 0; hint_index < hintstate->num_hints[HINT_TYPE_ROWS]; hint_index++) { RowsHint *row_hint = (RowsHint *) hintstate->rows_hints[hint_index]; StringPtrArray *aliases = GPOS_NEW(mp) StringPtrArray(mp); for (int rel_index = 0; rel_index < row_hint->nrels; rel_index++) { aliases->Append( GPOS_NEW(mp) CWStringConst(mp, row_hint->relnames[rel_index])); } plan_hints->AddHint(GPOS_NEW(mp) CRowHint( mp, aliases, CDouble(row_hint->rows), (CRowHint::RowsValueType) row_hint->value_type)); } // Translate LeadingHint => CJoinHint for (int hint_index = 0; hint_index < hintstate->num_hints[HINT_TYPE_LEADING]; hint_index++) { LeadingHint *leading_hint = (LeadingHint *) hintstate->leading_hint[hint_index]; CJoinHint::JoinNode *joinnode = nullptr; if (nullptr != leading_hint->outer_inner) { // is directed gpos::set aliases(mp); joinnode = GenerateJoinNodes(mp, leading_hint->outer_inner, aliases); if (nullptr != joinnode) { plan_hints->AddHint(GPOS_NEW(mp) CJoinHint(mp, joinnode)); } } else if (nullptr != leading_hint->relations) { // is directed-less joinnode = GenerateJoinNodes(mp, leading_hint->relations); if (nullptr != joinnode) { plan_hints->AddHint(GPOS_NEW(mp) CJoinHint(mp, joinnode)); } } } for (int hint_index = 0; hint_index < hintstate->num_hints[HINT_TYPE_JOIN_METHOD]; hint_index++) { JoinMethodHint *joinmethod_hint = (JoinMethodHint *) hintstate->join_hints[hint_index]; StringPtrArray *aliasnames = GPOS_NEW(mp) StringPtrArray(mp); for (int relname_index = 0; relname_index < joinmethod_hint->nrels; relname_index++) { aliasnames->Append(GPOS_NEW(mp) CWStringConst( mp, joinmethod_hint->relnames[relname_index])); } CJoinTypeHint::JoinType type = CJoinTypeHint::SENTINEL; switch (joinmethod_hint->base.hint_keyword) { case HINT_KEYWORD_NESTLOOP: { type = CJoinTypeHint::HINT_KEYWORD_NESTLOOP; break; } case HINT_KEYWORD_MERGEJOIN: { type = CJoinTypeHint::HINT_KEYWORD_MERGEJOIN; break; } case HINT_KEYWORD_HASHJOIN: { type = CJoinTypeHint::HINT_KEYWORD_HASHJOIN; break; } case HINT_KEYWORD_NONESTLOOP: { type = CJoinTypeHint::HINT_KEYWORD_NONESTLOOP; break; } case HINT_KEYWORD_NOMERGEJOIN: { type = CJoinTypeHint::HINT_KEYWORD_NOMERGEJOIN; break; } case HINT_KEYWORD_NOHASHJOIN: { type = CJoinTypeHint::HINT_KEYWORD_NOHASHJOIN; break; } default: { CWStringDynamic *error_message = GPOS_NEW(mp) CWStringDynamic( mp, GPOS_WSZ_LIT("Unsupported plan hint: ")); error_message->AppendFormat(GPOS_WSZ_LIT("%s"), joinmethod_hint->base.keyword); GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiUnsupportedOp, error_message->GetBuffer()); break; } } CJoinTypeHint *hint = GPOS_NEW(mp) CJoinTypeHint(mp, type, aliasnames); plan_hints->AddHint(hint); } return plan_hints; } #endif /* pg_hint_plan stub */ // Stub: no hint-plan support in PG18 mode CPlanHint * COptTasks::GetPlanHints(CMemoryPool *mp, Query * /*query*/) { (void) mp; return nullptr; } //--------------------------------------------------------------------------- // @function: // COptTasks::OptimizeTask // // @doc: // task that does the optimizes query to physical DXL // //--------------------------------------------------------------------------- void * COptTasks::OptimizeTask(void *ptr) { GPOS_ASSERT(nullptr != ptr); SOptContext *opt_ctxt = SOptContext::Cast(ptr); GPOS_ASSERT(nullptr != opt_ctxt->m_query); GPOS_ASSERT(nullptr == opt_ctxt->m_plan_dxl); GPOS_ASSERT(nullptr == opt_ctxt->m_plan_stmt); AUTO_MEM_POOL(amp); CMemoryPool *mp = amp.Pmp(); // Does the metadatacache need to be reset? // // On the first call, before the cache has been initialized, we // don't care about the return value of MDCacheNeedsReset(). But // we need to call it anyway, to give it a chance to initialize // the invalidation mechanism. bool reset_mdcache = gpdb::MDCacheNeedsReset(); // initialize metadata cache, or purge if needed, or change size if requested if (!CMDCache::FInitialized()) { CMDCache::Init(); CMDCache::SetCacheQuota(optimizer_mdcache_size * 1024L); } else if (reset_mdcache) { CMDCache::Reset(); CMDCache::SetCacheQuota(optimizer_mdcache_size * 1024L); } else if (CMDCache::ULLGetCacheQuota() != (ULLONG) optimizer_mdcache_size * 1024L) { CMDCache::SetCacheQuota(optimizer_mdcache_size * 1024L); } // load search strategy CSearchStageArray *search_strategy_arr = LoadSearchStrategy(mp, optimizer_search_strategy_path); CBitSet *trace_flags = nullptr; CBitSet *enabled_trace_flags = nullptr; CBitSet *disabled_trace_flags = nullptr; CDXLNode *plan_dxl = nullptr; IMdIdArray *col_stats = nullptr; MdidHashSet *rel_stats = nullptr; GPOS_TRY { // set trace flags trace_flags = CConfigParamMapping::PackConfigParamInBitset( mp, CXform::ExfSentinel); SetTraceflags(mp, trace_flags, &enabled_trace_flags, &disabled_trace_flags); // set up relcache MD provider CMDProviderRelcache *relcache_provider = GPOS_NEW(mp) CMDProviderRelcache(); { // scope for MD accessor CMDAccessor mda(mp, CMDCache::Pcache(), default_sysid, relcache_provider); ULONG num_segments = gpdb::GetGPSegmentCount(); ULONG num_segments_for_costing = optimizer_segments; if (0 == num_segments_for_costing) { num_segments_for_costing = num_segments; } CAutoP query_to_dxl_translator; query_to_dxl_translator = CTranslatorQueryToDXL::QueryToDXLInstance( mp, &mda, (Query *) opt_ctxt->m_query); ICostModel *cost_model = GetCostModel(mp, num_segments_for_costing); CPlanHint *plan_hints = GetPlanHints(mp, opt_ctxt->m_query); COptimizerConfig *optimizer_config = CreateOptimizerConfig(mp, cost_model, plan_hints); CConstExprEvaluatorProxy expr_eval_proxy(mp, &mda); IConstExprEvaluator *expr_evaluator = GPOS_NEW(mp) CConstExprEvaluatorDXL(mp, &mda, &expr_eval_proxy); CDXLNode *query_dxl = query_to_dxl_translator->TranslateQueryToDXL(); CDXLNodeArray *query_output_dxlnode_array = query_to_dxl_translator->GetQueryOutputCols(); CDXLNodeArray *cte_dxlnode_array = query_to_dxl_translator->GetCTEs(); GPOS_ASSERT(nullptr != query_output_dxlnode_array); // pg_orca runs on single-node PostgreSQL — always disable motions // (Motion nodes use T_Invalid as their node tag and would crash the // PG18 executor if they ever appeared in the plan tree). BOOL use_legacy_opfamilies = (query_to_dxl_translator->GetDistributionHashOpsKind() == DistrUseLegacyHashOps); CAutoTraceFlag atf1(EopttraceDisableMotions, true /*is_master_only*/); CAutoTraceFlag atf2(EopttraceUseLegacyOpfamilies, use_legacy_opfamilies); plan_dxl = COptimizer::PdxlnOptimize( mp, &mda, query_dxl, query_output_dxlnode_array, cte_dxlnode_array, expr_evaluator, num_segments, gp_session_id, gp_command_count, search_strategy_arr, optimizer_config); if (opt_ctxt->m_should_serialize_plan_dxl) { // serialize DXL to xml CWStringDynamic plan_str(mp); COstreamString oss(&plan_str); CDXLUtils::SerializePlan( mp, oss, plan_dxl, optimizer_config->GetEnumeratorCfg()->GetPlanId(), optimizer_config->GetEnumeratorCfg()->GetPlanSpaceSize(), true /*serialize_header_footer*/, true /*indentation*/); opt_ctxt->m_plan_dxl = CreateMultiByteCharStringFromWCString(plan_str.GetBuffer()); } // translate DXL->PlStmt only when needed if (opt_ctxt->m_should_generate_plan_stmt) { // always use opt_ctxt->m_query->can_set_tag as the query_to_dxl_translator->Pquery() is a mutated Query object // that may not have the correct can_set_tag opt_ctxt->m_plan_stmt = (PlannedStmt *) gpdb::CopyObject(ConvertToPlanStmtFromDXL( mp, &mda, opt_ctxt->m_query, plan_dxl, opt_ctxt->m_query->canSetTag, query_to_dxl_translator->GetDistributionHashOpsKind())); } CStatisticsConfig *stats_conf = optimizer_config->GetStatsConf(); col_stats = GPOS_NEW(mp) IMdIdArray(mp); // CBDB_MERGE_FIXME: empty table after analyze still have no stats // cause CBDB can't tell empty or no stats stats_conf->CollectMissingStatsColumns(col_stats); rel_stats = GPOS_NEW(mp) MdidHashSet(mp); PrintMissingStatsWarning(mp, &mda, col_stats, rel_stats); rel_stats->Release(); col_stats->Release(); expr_evaluator->Release(); query_dxl->Release(); optimizer_config->Release(); plan_dxl->Release(); } } GPOS_CATCH_EX(ex) { ResetTraceflags(enabled_trace_flags, disabled_trace_flags); CRefCount::SafeRelease(rel_stats); CRefCount::SafeRelease(col_stats); CRefCount::SafeRelease(enabled_trace_flags); CRefCount::SafeRelease(disabled_trace_flags); CRefCount::SafeRelease(trace_flags); CRefCount::SafeRelease(plan_dxl); CMDCache::Shutdown(); IErrorContext *errctxt = CTask::Self()->GetErrCtxt(); opt_ctxt->m_is_unexpected_failure = IsLoggableFailure(ex); opt_ctxt->m_error_msg = CreateMultiByteCharStringFromWCString(errctxt->GetErrorMsg()); GPOS_RETHROW(ex); } GPOS_CATCH_END; // cleanup ResetTraceflags(enabled_trace_flags, disabled_trace_flags); CRefCount::SafeRelease(enabled_trace_flags); CRefCount::SafeRelease(disabled_trace_flags); CRefCount::SafeRelease(trace_flags); if (!optimizer_metadata_caching) { CMDCache::Shutdown(); } return nullptr; } //--------------------------------------------------------------------------- // @function: // COptTasks::PrintMissingStatsWarning // // @doc: // Print warning messages for columns with missing statistics // //--------------------------------------------------------------------------- void COptTasks::PrintMissingStatsWarning(CMemoryPool *mp, CMDAccessor *md_accessor, IMdIdArray *col_stats, MdidHashSet *rel_stats) { GPOS_ASSERT(nullptr != md_accessor); GPOS_ASSERT(nullptr != col_stats); GPOS_ASSERT(nullptr != rel_stats); CWStringDynamic wcstr(mp); COstreamString oss(&wcstr); const ULONG num_missing_col_stats = col_stats->Size(); for (ULONG ul = 0; ul < num_missing_col_stats; ul++) { IMDId *mdid = (*col_stats)[ul]; CMDIdColStats *mdid_col_stats = CMDIdColStats::CastMdid(mdid); IMDId *rel_mdid = mdid_col_stats->GetRelMdId(); const ULONG pos = mdid_col_stats->Position(); const IMDRelation *rel = md_accessor->RetrieveRel(rel_mdid); if (IMDRelation::ErelstorageForeign != rel->RetrieveRelStorageType()) { if (!rel_stats->Contains(rel_mdid)) { if (0 != ul) { oss << ", "; } rel_mdid->AddRef(); rel_stats->Insert(rel_mdid); oss << rel->Mdname().GetMDName()->GetBuffer(); } CMDName mdname = rel->GetMdCol(pos)->Mdname(); char msgbuf[NAMEDATALEN * 2 + 100]; snprintf(msgbuf, sizeof(msgbuf), "Missing statistics for column: %s.%s", CreateMultiByteCharStringFromWCString( rel->Mdname().GetMDName()->GetBuffer()), CreateMultiByteCharStringFromWCString( mdname.GetMDName()->GetBuffer())); GpdbEreport(ERRCODE_SUCCESSFUL_COMPLETION, LOG, msgbuf, nullptr); } } if (0 < rel_stats->Size()) { int length = NAMEDATALEN * (int) rel_stats->Size() + 200; char *msgbuf = (char *) palloc(length); snprintf( msgbuf, length, "One or more columns in the following table(s) do not have statistics: %s", CreateMultiByteCharStringFromWCString(wcstr.GetBuffer())); GpdbEreport( ERRCODE_SUCCESSFUL_COMPLETION, NOTICE, msgbuf, "For non-partitioned tables, run analyze ()." " For partitioned tables, run analyze rootpartition ()." " See log for columns missing statistics."); } } //--------------------------------------------------------------------------- // @function: // COptTasks::Optimize // // @doc: // optimizes a query to physical DXL // //--------------------------------------------------------------------------- char * COptTasks::Optimize(Query *query) { Assert(query); SOptContext gpopt_context; gpopt_context.m_query = query; gpopt_context.m_should_serialize_plan_dxl = true; Execute(&OptimizeTask, &gpopt_context); // clean up context gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlanDXL); return gpopt_context.m_plan_dxl; } //--------------------------------------------------------------------------- // @function: // COptTasks::GPOPTOptimizedPlan // // @doc: // optimizes a query to plannedstmt // //--------------------------------------------------------------------------- PlannedStmt * COptTasks::GPOPTOptimizedPlan(Query *query, SOptContext *gpopt_context) { Assert(query); Assert(gpopt_context); gpopt_context->m_query = query; gpopt_context->m_should_generate_plan_stmt = true; Execute(&OptimizeTask, gpopt_context); return gpopt_context->m_plan_stmt; } //--------------------------------------------------------------------------- // @function: // COptTasks::SetXform // // @doc: // Enable/Disable a given xform // //--------------------------------------------------------------------------- bool COptTasks::SetXform(char *xform_str, bool should_disable) { CXform *xform = CXformFactory::Pxff()->Pxf(xform_str); if (nullptr != xform) { optimizer_xforms[xform->Exfid()] = should_disable; return true; } return false; } // EOF