/*------------------------------------------------------------------------- * * pg_pathcheck.c * Walk every Path in a finished query tree and flag freed / garbage nodes. * * Debug aid only. When loaded, planner_shutdown_hook traverses the top * PlannerInfo and every reachable subroot (RelOptInfo->subroot, * PlannerGlobal->subroots), visiting pathlist, partial_pathlist, * cheapest_*_path, non_recursive_path, and every sub-Path field of every * compound Path type. Each visited Path is checked for a valid NodeTag; * a bogus tag (e.g. the 0x7F bytes from CLOBBER_FREED_MEMORY, or a node * allocated in a freed slot after pfree) is reported as a WARNING together * with the relation names resolved from the owning RelOptInfo's relids * and the full contents of the containing pathlist. * * planner_shutdown_hook is not handed the top PlannerInfo directly, so we * stash it from create_upper_paths_hook into PlannerGlobal->extension_state. * That slot lives in the planner's per-query context and is reclaimed on * both normal exit and elog(ERROR), so nothing to clean up ourselves. * *------------------------------------------------------------------------- */ #include "postgres.h" #include "fmgr.h" #include "miscadmin.h" #include "nodes/pathnodes.h" #include "optimizer/extendplan.h" #include "optimizer/paths.h" #include "optimizer/planner.h" #include "tcop/tcopprot.h" #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/memutils.h" #include "pathtags_generated.h" #define PPC_NAME "pg_pathcheck" #define PPC_VERSION "0.9.1" PG_MODULE_MAGIC_EXT( .name = PPC_NAME, .version = PPC_VERSION ); void _PG_init(void); /* Chained upstream hooks. */ static create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; static planner_shutdown_hook_type prev_planner_shutdown_hook = NULL; static set_join_pathlist_hook_type prev_set_join_pathlist_hook = NULL; static set_rel_pathlist_hook_type prev_set_rel_pathlist_hook = NULL; /* Lazily resolved ID for our extension_state slot on PlannerGlobal. */ static int ppc_ext_id = -1; /* * GUC: pg_pathcheck.elevel * The elevel passed to ereport() when a corrupt Path is detected. * LOG writes to the server log only; WARNING (default) also notifies * the client and continues; ERROR aborts the statement; PANIC crashes * the backend so you get a core dump for post-mortem. */ static int ppc_elevel = WARNING; static const struct config_enum_entry ppc_elevel_options[] = { {"log", LOG, false}, {"warning", WARNING, false}, {"error", ERROR, false}, {"panic", PANIC, false}, {NULL, 0, false} }; /* * GUC: pg_pathcheck.stage_checks * When on, the per-stage hooks (set_rel_pathlist_hook, * set_join_pathlist_hook, and the pathlist check inside * create_upper_paths_hook) walk the affected rels' pathlists on every * firing. Off by default: those checks multiply planner work by the * number of base/join/upper stages and are only needed when narrowing * down a bug already flagged by the end-of-planning walker. * * The end-of-planning walker (planner_shutdown_hook) and the root * stashing inside create_upper_paths_hook remain active regardless of * this flag. */ static bool ppc_stage_checks = false; /* * Visited-pointer hash, rebuilt per top-level walk. Deduplication tool. * Prevents exponential blow-up when the same sub-path is reachable from * multiple parents (AppendPath children, cheapest_*_path aliases, etc.). */ static HTAB *visited = NULL; /* Forward declarations. */ static void ppc_create_upper_paths(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, RelOptInfo *output_rel, void *extra); static void ppc_planner_shutdown(PlannerGlobal *glob, Query *parse, const char *query_string, PlannedStmt *pstmt); static void ppc_set_join_pathlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); static void ppc_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void check_rel_pathlists(RelOptInfo *rel, const char *ctx, PlannerInfo *root); static void check_rel_pathlist(List *paths, const char *listname, RelOptInfo *rel, const char *ctx, PlannerInfo *root); static const char *upper_stage_name(UpperRelationKind stage); static void walk_planner_info(PlannerInfo *root); static void walk_rel(RelOptInfo *rel, PlannerInfo *root); static void walk_pathlist(List *paths, const char *listname, RelOptInfo *rel, PlannerInfo *root); static void walk_path(Path *path, const char *source, List *container, RelOptInfo *rel, PlannerInfo *root); static void verify_path_parent(Path *path, RelOptInfo *expected, const char *source, List *container, PlannerInfo *root); static bool mark_visited(void *ptr); static bool is_path_tag(NodeTag tag); static const char *tag_name(int tag); static const char *format_relnames(RelOptInfo *rel, PlannerInfo *root); static const char *format_pathlist(List *paths); /* * _PG_init * Chain onto the two planner hooks we need. */ void _PG_init(void) { DefineCustomEnumVariable(PPC_NAME ".elevel", "elevel used when a corrupt Path is detected.", "LOG writes to the server log only, WARNING " "also notifies the client and continues, ERROR " "aborts the statement, PANIC crashes for a " "core dump.", &ppc_elevel, WARNING, ppc_elevel_options, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable(PPC_NAME ".stage_checks", "Run per-stage pathlist checks at base-rel, " "join-rel and upper-rel hook boundaries.", "Off by default. Turn on only to narrow down a " "finding already flagged at end of planning.", &ppc_stage_checks, false, PGC_USERSET, 0, NULL, NULL, NULL); MarkGUCPrefixReserved(PPC_NAME); prev_create_upper_paths_hook = create_upper_paths_hook; create_upper_paths_hook = ppc_create_upper_paths; prev_planner_shutdown_hook = planner_shutdown_hook; planner_shutdown_hook = ppc_planner_shutdown; prev_set_join_pathlist_hook = set_join_pathlist_hook; set_join_pathlist_hook = ppc_set_join_pathlist; prev_set_rel_pathlist_hook = set_rel_pathlist_hook; set_rel_pathlist_hook = ppc_set_rel_pathlist; } /* * ppc_set_join_pathlist * Early dangling-pointer check, fired immediately after a join's * pathlists have been populated. Walks every entry of the two * sibling rels' pathlist / partial_pathlist — i.e. the full outer * and inner input-rel pathlists, not just the children referenced * by the new join paths. Any entry with a non-Path NodeTag, or a * valid Path tag whose ->parent has drifted to another rel, is * reported at pg_pathcheck.elevel. * * This narrows the detection window from end-of-planning to * end-of-this-join. Set pg_pathcheck.elevel = 'error' to abort the * statement on the first finding and pin the guilty query. */ static void ppc_set_join_pathlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra) { if (prev_set_join_pathlist_hook) (*prev_set_join_pathlist_hook) (root, joinrel, outerrel, innerrel, jointype, extra); if (!ppc_stage_checks) return; Assert(joinrel != NULL && outerrel != NULL && innerrel != NULL); { char *outer_ctx = psprintf("outer side of join rel %s", format_relnames(joinrel, root)); char *inner_ctx = psprintf("inner side of join rel %s", format_relnames(joinrel, root)); check_rel_pathlists(outerrel, outer_ctx, root); check_rel_pathlists(innerrel, inner_ctx, root); } } /* * ppc_set_rel_pathlist * Fires at the end of set_rel_pathlist() for each base rel. Uses the * same pathlist/partial_pathlist validator as the join hook — every * entry must be a live Path whose ->parent is this rel. Detection at * this point localises a finding to a specific set_*_pathlist step. */ static void ppc_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { if (prev_set_rel_pathlist_hook) (*prev_set_rel_pathlist_hook) (root, rel, rti, rte); if (!ppc_stage_checks) return; Assert(rel != NULL); check_rel_pathlists(rel, "base rel", root); } /* * check_rel_pathlists * Check both pathlist and partial_pathlist of one rel. ctx is a short * free-form string appended to every finding to describe the call site * ("base rel", "outer side of join rel {a,b}", etc.) so a developer * can correlate findings with a specific planner step. */ static void check_rel_pathlists(RelOptInfo *rel, const char *ctx, PlannerInfo *root) { check_rel_pathlist(rel->pathlist, "pathlist", rel, ctx, root); check_rel_pathlist(rel->partial_pathlist, "partial_pathlist", rel, ctx, root); } /* * check_rel_pathlist * Validate every Path pointer in one list owned by rel. Two failure * modes: * (a) the entry's NodeTag is not a Path-family tag — the chunk has * been freed or overwritten (dangling pointer); * (b) the entry carries a valid Path tag but ->parent does not * match the owning rel — same-size-class aliasing, the slot * has been re-claimed by a Path of a different rel. */ static void check_rel_pathlist(List *paths, const char *listname, RelOptInfo *rel, const char *ctx, PlannerInfo *root) { ListCell *lc; foreach(lc, paths) { Path *p = (Path *) lfirst(lc); NodeTag tag; int i = foreach_current_index(lc); if (p == NULL) continue; tag = nodeTag(p); if (!is_path_tag(tag)) { ereport(ppc_elevel, errmsg(PPC_NAME ": dangling pointer at %s[%d], rel %s (%s)", listname, i, format_relnames(rel, root), ctx), errdetail("invalid NodeTag %s; %s contents: %s", tag_name((int) tag), listname, format_pathlist(paths)), errhint("query: %s", debug_query_string ? debug_query_string : "(null)")); continue; } /* * Upper rels legitimately hold paths whose ->parent is the input * rel (apply_scanjoin_target_to_paths and similar), so the * identity check only applies to base and join rels. */ if (!IS_UPPER_REL(rel) && p->parent != rel) { RelOptInfo *actual = p->parent; ereport(ppc_elevel, errmsg(PPC_NAME ": path parent mismatch at %s[%d], rel %s (%s)", listname, i, format_relnames(rel, root), ctx), errdetail("path %s claims rel %s", tag_name((int) tag), (actual != NULL && IsA(actual, RelOptInfo)) ? format_relnames(actual, root) : "(garbage)"), errhint("query: %s", debug_query_string ? debug_query_string : "(null)")); } } } /* * upper_stage_name * Symbolic name for an UpperRelationKind value, for diagnostics. */ static const char * upper_stage_name(UpperRelationKind stage) { switch (stage) { case UPPERREL_SETOP: return "UPPERREL_SETOP"; case UPPERREL_PARTIAL_GROUP_AGG: return "UPPERREL_PARTIAL_GROUP_AGG"; case UPPERREL_GROUP_AGG: return "UPPERREL_GROUP_AGG"; case UPPERREL_WINDOW: return "UPPERREL_WINDOW"; case UPPERREL_PARTIAL_DISTINCT: return "UPPERREL_PARTIAL_DISTINCT"; case UPPERREL_DISTINCT: return "UPPERREL_DISTINCT"; case UPPERREL_ORDERED: return "UPPERREL_ORDERED"; case UPPERREL_FINAL: return "UPPERREL_FINAL"; } return "UPPERREL_?"; } /* * ppc_create_upper_paths * Dual duty: * 1. Capture the top-level PlannerInfo at UPPERREL_FINAL, since the * planner-shutdown hook is not handed the top root directly. * 2. Check the input and output rels' pathlist / partial_pathlist at * every upper-rel stage. This narrows detection from "end of * planning" to "end of this upper-rel stage", so a finding at * (say) UPPERREL_ORDERED but not at UPPERREL_WINDOW pins the bug * to ordered-paths construction in grouping_planner. */ static void ppc_create_upper_paths(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, RelOptInfo *output_rel, void *extra) { if (prev_create_upper_paths_hook) (*prev_create_upper_paths_hook) (root, stage, input_rel, output_rel, extra); /* (1) stash top root so planner_shutdown_hook can find it */ if (root->parent_root == NULL && stage == UPPERREL_FINAL) { if (ppc_ext_id < 0) ppc_ext_id = GetPlannerExtensionId(PPC_NAME); SetPlannerGlobalExtensionState(root->glob, ppc_ext_id, root); } /* (2) per-stage pathlist check on input and output rels (opt-in) */ if (ppc_stage_checks) { const char *sname = upper_stage_name(stage); char *in_ctx = psprintf("create_upper_paths input, stage %s", sname); char *out_ctx = psprintf("create_upper_paths output, stage %s", sname); if (input_rel != NULL) check_rel_pathlists(input_rel, in_ctx, root); if (output_rel != NULL) check_rel_pathlists(output_rel, out_ctx, root); } } /* * ppc_planner_shutdown * Walk all Paths reachable from the top root and from PlannerGlobal. */ static void ppc_planner_shutdown(PlannerGlobal *glob, Query *parse, const char *query_string, PlannedStmt *pstmt) { PlannerInfo *top_root; if (ppc_ext_id >= 0 && (top_root = GetPlannerGlobalExtensionState(glob, ppc_ext_id)) != NULL) { HASHCTL ctl = {0}; ctl.keysize = sizeof(void *); ctl.entrysize = sizeof(void *); ctl.hcxt = CurrentMemoryContext; /* * Do not care about previous value of the pointer. It might stay * initialized in case of previous internal error. But memory already * freed because of transactional memory context. */ visited = hash_create(PPC_NAME " visited", 1024, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); walk_planner_info(top_root); /* Subplan PlannerInfos and their backing top-level Paths. */ foreach_node(PlannerInfo, root, glob->subroots) walk_planner_info(root); /* * Deliberately duplicated crawler - just to find potential * low-probability discrepancies or dangled pointers in this list itself */ walk_pathlist(glob->subpaths, "glob->subpaths", NULL, top_root); hash_destroy(visited); visited = NULL; } if (prev_planner_shutdown_hook) (*prev_planner_shutdown_hook) (glob, parse, query_string, pstmt); } /* * walk_planner_info * Visit every RelOptInfo on this root and recurse into its subroots. */ static void walk_planner_info(PlannerInfo *root) { int i; if (root == NULL || !mark_visited(root)) return; check_stack_depth(); /* Upper rels: one List per UpperRelationKind. */ for (i = 0; i <= UPPERREL_FINAL; i++) { foreach_node(RelOptInfo, rel, root->upper_rels[i]) walk_rel(rel, root); } /* Base rels and appendrel children. */ Assert(root->simple_rel_array_size == 0 || root->simple_rel_array != NULL); for (i = 1; i < root->simple_rel_array_size; i++) { RelOptInfo *rel = root->simple_rel_array[i]; if (rel == NULL) continue; walk_rel(rel, root); /* Recurse into subquery PlannerInfos. */ if (rel->subroot != NULL) walk_planner_info(rel->subroot); } /* Join rels collected during dynamic programming. */ foreach_node(RelOptInfo, rel, root->join_rel_list) walk_rel(rel, root); /* Non-recursive term of a recursive CTE, if any. */ walk_path(root->non_recursive_path, "non_recursive_path", NULL, NULL, root); } /* * walk_rel * Visit every Path slot on a RelOptInfo, and recurse into parallel * RelOptInfos that hang off it (unique_rel, grouped_rel, part_rels[]). * Upward links (parent / top_parent) are intentionally not followed: * those rels are reached via simple_rel_array or join_rel_list anyway. */ static void walk_rel(RelOptInfo *rel, PlannerInfo *root) { ListCell *lc; if (rel == NULL || !mark_visited(rel)) return; /* * Parent-match check for every Path directly attached to this rel. * If a slot has been reused by a Path built for another rel * (same-size-class aliasing that survives the NodeTag check), the * reused Path's ->parent points to its real owner, not to us. * * Skip upper rels: their pathlists legitimately carry paths whose * ->parent is the input rel (see apply_scanjoin_target_to_paths and * similar). The invariant only holds for base and join rels. */ if (!IS_UPPER_REL(rel)) { foreach(lc, rel->pathlist) verify_path_parent(lfirst(lc), rel, "pathlist", rel->pathlist, root); foreach(lc, rel->partial_pathlist) verify_path_parent(lfirst(lc), rel, "partial_pathlist", rel->partial_pathlist, root); foreach(lc, rel->cheapest_parameterized_paths) verify_path_parent(lfirst(lc), rel, "cheapest_parameterized_paths", rel->cheapest_parameterized_paths, root); verify_path_parent(rel->cheapest_startup_path, rel, "cheapest_startup_path", NULL, root); verify_path_parent(rel->cheapest_total_path, rel, "cheapest_total_path", NULL, root); } walk_pathlist(rel->pathlist, "pathlist", rel, root); walk_pathlist(rel->partial_pathlist, "partial_pathlist", rel, root); walk_pathlist(rel->cheapest_parameterized_paths, "cheapest_parameterized_paths", rel, root); walk_path(rel->cheapest_startup_path, "cheapest_startup_path", NULL, rel, root); walk_path(rel->cheapest_total_path, "cheapest_total_path", NULL, rel, root); /* * Recurse into special RelOptInfos in case their paths are washed out of * the main pathlist. */ if (rel->unique_rel != NULL) walk_rel(rel->unique_rel, root); if (rel->grouped_rel != NULL) walk_rel(rel->grouped_rel, root); /* Purely redundant. Just to be paranoid. */ if (rel->part_rels != NULL) { int i; for (i = 0; i < rel->nparts; i++) if (rel->part_rels[i] != NULL) walk_rel(rel->part_rels[i], root); } } /* * verify_path_parent * Confirm that a Path found directly on rel's own pathlist-family * fields actually claims rel as its parent. A mismatch catches the * aliasing case that escapes the NodeTag check: the memory chunk was * freed and re-allocated as a different Path (possibly belonging to * another rel) within the same planning session. * * Called only from walk_rel, never from the sub-path recursion — * inside compound Path nodes the parent legitimately varies (e.g., * JoinPath.outerjoinpath belongs to the child rel, not the join rel). */ static void verify_path_parent(Path *path, RelOptInfo *expected, const char *source, List *container, PlannerInfo *root) { RelOptInfo *actual; /* * Skip NULL and paths with an invalid NodeTag: walk_path already * reports those, and ->parent on a bogus chunk is not readable. */ if (path == NULL || !is_path_tag(nodeTag(path))) return; actual = path->parent; if (actual == expected) return; /* * ->parent doesn't match. As the memory is reused it might happen we see * a sort of garbage here, so validate the pointer before dereferencing it * via IS_UPPER_REL (which reads ->reloptkind). */ if (actual == NULL || !IsA(actual, RelOptInfo)) { ereport(ppc_elevel, errmsg(PPC_NAME ": path has non-RelOptInfo parent in %s, target rel %s", source, format_relnames(expected, root)), errhint("query: %s", debug_query_string ? debug_query_string : "(null)")); return; } /* * Upper rels legitimately carry paths whose ->parent is the input rel * (see apply_scanjoin_target_to_paths and friends). We filter these out * here rather than in walk_rel so that a garbage ->parent is still caught * by the IsA check above. */ if (IS_UPPER_REL(actual)) return; /* * Classic same-size-class alias: the slot was reused by another rel's * path. Name both rels by their contributing base relations. */ ereport(ppc_elevel, errmsg(PPC_NAME ": path parent mismatch in %s, target rel %s", source, format_relnames(expected, root)), container != NULL ? errdetail("path %s claims rel %s, path signature: rows: %.0lf, scost: %.2lf, tcost: %.2lf; %s contents: %s", tag_name(path->type), actual->relids != NULL ? nodeToString(actual->relids) : "UPPER_REL", path->rows, path->startup_cost, path->total_cost, source, format_pathlist(container)) : errdetail("path %s claims rel %s, path signature: rows: %.0lf, scost: %.2lf, tcost: %.2lf", tag_name(path->type), actual->relids != NULL ? nodeToString(actual->relids) : "UPPER_REL", path->rows, path->startup_cost, path->total_cost), errhint("query: %s", debug_query_string ? debug_query_string : "(null)")); } /* * walk_pathlist * Visit each Path in a List, tagging every element with the list's name. */ static void walk_pathlist(List *paths, const char *listname, RelOptInfo *rel, PlannerInfo *root) { ListCell *lc; foreach(lc, paths) walk_path((Path *) lfirst(lc), listname, paths, rel, root); } /* * walk_path * Validate a Path's NodeTag and descend into every embedded sub-Path. * * The NodeTag check is the main "is this memory still a Path" probe. * With CLOBBER_FREED_MEMORY the tag becomes 0x7F7F7F7F which fails * is_path_tag; with a real freed-and-reused chunk the odds of landing * on a valid Path tag are vanishingly low. * * source names the field or list that contains this Path (for diagnostics). * container, when non-NULL, is the List whose full contents are dumped * in the errdetail when corruption is detected. * * Layout safety net: every Path subtype dereferenced below is guarded * by a structural-hash entry in PPC_WALK_PATH_EXPECTED_HASHES further * down in this file. If you are here because a field name no longer * compiles, *do not* paper over it by renaming the access — read the * mirror block's header comment first: the hash-mismatch diagnostic * will tell you which struct moved and why you are being forced to * look. */ static void walk_path(Path *path, const char *source, List *container, RelOptInfo *rel, PlannerInfo *root) { NodeTag tag; if (path == NULL) return; check_stack_depth(); tag = nodeTag(path); if (!is_path_tag(tag)) { ereport(ppc_elevel, errmsg(PPC_NAME ": invalid NodeTag %s in %s, rel %s", tag_name((int) tag), source, format_relnames(rel, root)), container != NULL ? errdetail("%s contents: %s", source, format_pathlist(container)) : 0, errhint("query: %s", debug_query_string ? debug_query_string : "(null)")); return; } if (!mark_visited(path)) return; /* * Dive into path tree. It is necessary (most of the time redundant) step * that we need to pass because single operation, represented by specific * RelOptInfo might be implemented by a complex path tree. */ switch (tag) { case T_Path: case T_IndexPath: case T_TidPath: case T_TidRangePath: case T_GroupResultPath: /* No sub-Paths. */ break; case T_BitmapHeapPath: walk_path(((BitmapHeapPath *) path)->bitmapqual, "BitmapHeapPath.bitmapqual", NULL, rel, root); break; case T_BitmapAndPath: walk_pathlist(((BitmapAndPath *) path)->bitmapquals, "BitmapAndPath.bitmapquals", rel, root); break; case T_BitmapOrPath: walk_pathlist(((BitmapOrPath *) path)->bitmapquals, "BitmapOrPath.bitmapquals", rel, root); break; case T_SubqueryScanPath: walk_path(((SubqueryScanPath *) path)->subpath, "SubqueryScanPath.subpath", NULL, rel, root); break; case T_ForeignPath: walk_path(((ForeignPath *) path)->fdw_outerpath, "ForeignPath.fdw_outerpath", NULL, rel, root); break; case T_CustomPath: walk_pathlist(((CustomPath *) path)->custom_paths, "CustomPath.custom_paths", rel, root); break; case T_AppendPath: walk_pathlist(((AppendPath *) path)->subpaths, "AppendPath.subpaths", rel, root); break; case T_MergeAppendPath: walk_pathlist(((MergeAppendPath *) path)->subpaths, "MergeAppendPath.subpaths", rel, root); break; case T_MaterialPath: walk_path(((MaterialPath *) path)->subpath, "MaterialPath.subpath", NULL, rel, root); break; case T_MemoizePath: walk_path(((MemoizePath *) path)->subpath, "MemoizePath.subpath", NULL, rel, root); break; case T_GatherPath: walk_path(((GatherPath *) path)->subpath, "GatherPath.subpath", NULL, rel, root); break; case T_GatherMergePath: walk_path(((GatherMergePath *) path)->subpath, "GatherMergePath.subpath", NULL, rel, root); break; case T_NestPath: case T_MergePath: case T_HashPath: walk_path(((JoinPath *) path)->outerjoinpath, "JoinPath.outerjoinpath", NULL, rel, root); walk_path(((JoinPath *) path)->innerjoinpath, "JoinPath.innerjoinpath", NULL, rel, root); break; case T_ProjectionPath: walk_path(((ProjectionPath *) path)->subpath, "ProjectionPath.subpath", NULL, rel, root); break; case T_ProjectSetPath: walk_path(((ProjectSetPath *) path)->subpath, "ProjectSetPath.subpath", NULL, rel, root); break; case T_SortPath: walk_path(((SortPath *) path)->subpath, "SortPath.subpath", NULL, rel, root); break; case T_IncrementalSortPath: walk_path(((IncrementalSortPath *) path)->spath.subpath, "IncrementalSortPath.subpath", NULL, rel, root); break; case T_GroupPath: walk_path(((GroupPath *) path)->subpath, "GroupPath.subpath", NULL, rel, root); break; case T_UniquePath: walk_path(((UniquePath *) path)->subpath, "UniquePath.subpath", NULL, rel, root); break; case T_AggPath: walk_path(((AggPath *) path)->subpath, "AggPath.subpath", NULL, rel, root); break; case T_GroupingSetsPath: walk_path(((GroupingSetsPath *) path)->subpath, "GroupingSetsPath.subpath", NULL, rel, root); break; case T_MinMaxAggPath: { /* * MinMaxAggPath has no top-level Path *, but each * MinMaxAggInfo on mmaggregates carries an access path * (the index-scan-with-LIMIT-1 sub-plan that materialises * one MIN/MAX value) and the PlannerInfo subroot used to * plan it. Neither is reachable from glob->subroots * (preprocess_minmax_aggregates does not register subroots * there), so we walk them explicitly here. walk_planner_info * is idempotent via the visited HTAB, so we can call it * defensively without worrying about double-traversal. */ MinMaxAggPath *mmap = (MinMaxAggPath *) path; ListCell *lc; foreach(lc, mmap->mmaggregates) { MinMaxAggInfo *info = lfirst_node(MinMaxAggInfo, lc); walk_path(info->path, "MinMaxAggInfo.path", NULL, rel, root); if (info->subroot != NULL) walk_planner_info(info->subroot); } } break; case T_WindowAggPath: walk_path(((WindowAggPath *) path)->subpath, "WindowAggPath.subpath", NULL, rel, root); break; case T_SetOpPath: walk_path(((SetOpPath *) path)->leftpath, "SetOpPath.leftpath", NULL, rel, root); walk_path(((SetOpPath *) path)->rightpath, "SetOpPath.rightpath", NULL, rel, root); break; case T_RecursiveUnionPath: walk_path(((RecursiveUnionPath *) path)->leftpath, "RecursiveUnionPath.leftpath", NULL, rel, root); walk_path(((RecursiveUnionPath *) path)->rightpath, "RecursiveUnionPath.rightpath", NULL, rel, root); break; case T_LockRowsPath: walk_path(((LockRowsPath *) path)->subpath, "LockRowsPath.subpath", NULL, rel, root); break; case T_ModifyTablePath: walk_path(((ModifyTablePath *) path)->subpath, "ModifyTablePath.subpath", NULL, rel, root); break; case T_LimitPath: walk_path(((LimitPath *) path)->subpath, "LimitPath.subpath", NULL, rel, root); break; default: /* is_path_tag accepted it, so this can't happen. */ Assert(false); break; } } /* * Lookup table: NodeTag value → symbolic name. Generated at build time * from src/backend/nodes/nodetags.h by a sed rule in the Makefile. * Entries for unused slots are NULL. The array size auto-tracks the * highest designated-initializer index emitted by the generator, so new * upstream tags don't silently overflow. */ static const char * const nodetag_names[] = { #include "nodetag_names.h" }; /* * tag_name * Return the symbolic name for a NodeTag value, or "UNDEF(nnn)" when * the value falls outside the known range. The UNDEF string is * allocated in the current memory context on each call; the caller * typically consumes it immediately inside an ereport. */ static const char * tag_name(int tag) { if (tag >= 0 && tag < (int) lengthof(nodetag_names) && nodetag_names[tag] != NULL) return nodetag_names[tag]; return psprintf("UNDEF(%d)", tag); } /* * format_relnames * Build a human-readable string from the owning RelOptInfo's relids, * resolving each member through root->simple_rte_array[i]->eref. * Returns a palloc'd string like "{t1, t2}" or "(unknown)" when * the rel or root is not available. */ static const char * format_relnames(RelOptInfo *rel, PlannerInfo *root) { StringInfoData buf; int x; bool first = true; if (rel == NULL || rel->relids == NULL || root == NULL) return "(unknown)"; if (!IsA(rel->relids, Bitmapset)) return "(invalid relids)"; initStringInfo(&buf); appendStringInfoChar(&buf, '{'); x = -1; while ((x = bms_next_member(rel->relids, x)) >= 0) { RangeTblEntry *rte; if (!first) appendStringInfoString(&buf, ", "); first = false; if (x < root->simple_rel_array_size && root->simple_rte_array != NULL && (rte = root->simple_rte_array[x]) != NULL && rte->eref != NULL) { appendStringInfoString(&buf, rte->eref->aliasname); } else { appendStringInfo(&buf, "rel#%d", x); } } appendStringInfoChar(&buf, '}'); return buf.data; } /* * format_pathlist * Dump the contents of a List of Path pointers: address and NodeTag * for each element. Used in errdetail when corruption is detected. */ static const char * format_pathlist(List *paths) { StringInfoData buf; ListCell *lc; int i = 0; if (paths == NIL) return "(empty)"; initStringInfo(&buf); foreach(lc, paths) { Path *p = (Path *) lfirst(lc); if (i > 0) appendStringInfoString(&buf, "; "); if (p == NULL) { appendStringInfo(&buf, "[%d] NULL", i); } else { NodeTag t = nodeTag(p); if (is_path_tag(t)) appendStringInfo(&buf, "[%d] %s", i, tag_name((int) t)); else appendStringInfo(&buf, "[%d] %s INVALID", i, tag_name((int) t)); } i++; } return buf.data; } /* * mark_visited * Insert ptr into the visited set; true on first visit, false otherwise. */ static bool mark_visited(void *ptr) { bool found; Assert(visited != NULL); Assert(ptr != NULL); (void) hash_search(visited, &ptr, HASH_ENTER, &found); return !found; } /* * PPC_WALK_PATH_EXPECTED_HASHES * Single hand-maintained source of truth for the set of Path subtypes * walk_path() knows how to descend into, together with each subtype's * expected structural hash. Drives two compile-time checks: * * - The count of entries here must equal the count in PATH_TAG_LIST * (generated from pathnodes.h). Catches any addition or removal * of a Path subtype in core: the build fails with a message * pointing here. * * - Each entry's expected hash must equal PPC_PATH_HASH_T_ * from pathtags_generated.h. Catches any layout edit inside an * existing Path struct (field add/remove/rename/retype/reorder), * naming the specific subtype that drifted. * * When the build breaks: * * 1. If the count mismatches: a subtype was added or removed in * pathnodes.h. Teach walk_path() how to descend into the new * subtype (or let it fall through if it has no sub-Paths), then * add or remove the corresponding entry below. * * 2. If a per-tag hash mismatches: the named subtype's body changed. * Diff pathnodes.h against the version the hash was blessed * against, audit walk_path()'s case for that subtype, and run * `make bless-path-hashes` to refresh this list. (The make * target is a convenience; the audit of walk_path() is not * automated and is mandatory.) * * All concrete subtypes are listed — even the ones walk_path() treats * as leaves (T_Path, T_IndexPath, T_TidPath, T_TidRangePath, * T_GroupResultPath) — because "no sub-Paths to descend" is itself a * layout claim that can rot if core grows a new Path * field in one * of them. * * Limitation: each hash covers only its subtype's own body, not the * bodies of embedded parent structs. walk_path() reaches into * embedded parents at exactly one point -- IncrementalSortPath's * spath.subpath -- and that access is protected by the SortPath * entry below. If future walker code dereferences a non-Path struct * embedded in a Path (e.g. ((Foo *) path)->non_path.field), layout * changes in that non-Path struct will not trip this guard and must * be handled separately. * * See contrib/pg_pathcheck/README.md section "Bumping PostgreSQL" * for the end-to-end workflow. */ #define PPC_WALK_PATH_EXPECTED_HASHES(X) \ X(T_Path, 0x09ee69a9e5a8f23bULL) \ X(T_IndexPath, 0xd9eab6b3c997d515ULL) \ X(T_BitmapHeapPath, 0x75a7b181f72c352dULL) \ X(T_BitmapAndPath, 0xccc395f9829cc5eeULL) \ X(T_BitmapOrPath, 0xccc395f9829cc5eeULL) \ X(T_TidPath, 0x689cb40f04282c9cULL) \ X(T_TidRangePath, 0x49cc3bbec71064f4ULL) \ X(T_SubqueryScanPath, 0x90f989f41b57b041ULL) \ X(T_ForeignPath, 0xf6d2f824716c5cd8ULL) \ X(T_CustomPath, 0x349311c5592f0b5bULL) \ X(T_AppendPath, 0xb56a41497d3eb372ULL) \ X(T_MergeAppendPath, 0xfce0f29acc68fbbcULL) \ X(T_GroupResultPath, 0x84701fcd7617cc00ULL) \ X(T_MaterialPath, 0x90f989f41b57b041ULL) \ X(T_MemoizePath, 0x3dd2cdd46b1c4006ULL) \ X(T_GatherPath, 0x50237ef02554d33dULL) \ X(T_GatherMergePath, 0x5468656d4d359ad1ULL) \ X(T_NestPath, 0xc9d7126d080e587eULL) \ X(T_MergePath, 0x18d4e85c337a4499ULL) \ X(T_HashPath, 0x7fabf0c425c0856cULL) \ X(T_ProjectionPath, 0xe9172dd4d292d671ULL) \ X(T_ProjectSetPath, 0x90f989f41b57b041ULL) \ X(T_SortPath, 0x90f989f41b57b041ULL) \ X(T_IncrementalSortPath, 0x4c69aafa5f126723ULL) \ X(T_GroupPath, 0xd3ffe008ac1ac2fcULL) \ X(T_UniquePath, 0x5654a3410d707ef9ULL) \ X(T_AggPath, 0x179cb625db854a97ULL) \ X(T_GroupingSetsPath, 0x62c65256f41bdcb6ULL) \ X(T_MinMaxAggPath, 0xda76e318ee433a2fULL) \ X(T_WindowAggPath, 0xccabff582235b106ULL) \ X(T_SetOpPath, 0xd849dc901753a051ULL) \ X(T_RecursiveUnionPath, 0x46f3b9fc9f2321f6ULL) \ X(T_LockRowsPath, 0x094815119f154b2dULL) \ X(T_ModifyTablePath, 0x1ea4932e8ac9b889ULL) \ X(T_LimitPath, 0x4fd0995222ca414eULL) /* * Count-parity check: the number of subtypes we expect walk_path() to * handle must equal the number actually present in PATH_TAG_LIST. This * is what catches additions and removals in core; the per-tag hash * asserts below catch layout changes on existing subtypes. */ #define PPC_COUNT_ONE_ARG(t) + 1 #define PPC_COUNT_TWO_ARG(t, expected) + 1 StaticAssertDecl((0 PATH_TAG_LIST(PPC_COUNT_ONE_ARG)) == (0 PPC_WALK_PATH_EXPECTED_HASHES(PPC_COUNT_TWO_ARG)), "pg_pathcheck: number of Path subtypes in pathnodes.h " "no longer matches the set handled by walk_path(); add " "or remove entries in PPC_WALK_PATH_EXPECTED_HASHES " "and teach walk_path() about the change."); #undef PPC_COUNT_ONE_ARG #undef PPC_COUNT_TWO_ARG /* * Per-subtype hash check: expand each entry into a StaticAssertDecl that * compares the blessed hash against the one gen_pathtags.pl just computed. * Token-paste PPC_PATH_HASH_ onto the tag to reference the generated * constant; stringify the tag so the diagnostic names the exact struct. */ #define PPC_HASH_ASSERT(tag, expected) \ StaticAssertDecl((expected) == PPC_PATH_HASH_##tag, \ "pg_pathcheck: struct layout for " #tag " changed in " \ "pathnodes.h; audit walk_path() and run " \ "`make bless-path-hashes` to refresh " \ "PPC_WALK_PATH_EXPECTED_HASHES."); PPC_WALK_PATH_EXPECTED_HASHES(PPC_HASH_ASSERT) #undef PPC_HASH_ASSERT /* * is_path_tag * True if tag names a Path descendant. The case list is expanded from * PATH_TAG_LIST, which gen_pathtags.pl derives from pathnodes.h at * build time — so the whitelist cannot go stale. Keeping it as a * switch (rather than a range test) preserves the freed-memory-tag * safety property: a random integer that happens to fall into the * numeric range of Path tags but was never minted by core still * falls through to the default arm. */ static bool is_path_tag(NodeTag tag) { switch (tag) { #define PPC_CASE(t) case t: PATH_TAG_LIST(PPC_CASE) #undef PPC_CASE return true; default: return false; } }