-- pgmnemo--0.9.0--0.9.1.sql
-- Incremental upgrade: pgmnemo 0.9.0 → 0.9.1
-- SPDX-License-Identifier: Apache-2.0
--
-- THEME: Fix navigate_expand + navigate_locate graph traversal (P0)
--
-- ROOT CAUSE: Both navigate_* functions filter by edge_kind IN ('causal','temporal'),
--   but production edges written via backfill_mem_edge.py all have edge_kind='semantic'
--   due to unmapped relation_type 'CO_TEMPORAL' (the 'triple-dead' mismatch).
--   relation_type is the REAL discriminator and must be the filter predicate.
--
-- ITEMS:
--   #1  navigate_expand(): filter by relation_type (not edge_kind); add relation_types
--       parameter; bidirectional BFS; handle valid_until='infinity'; threshold 0.7→0.5
--   #2  navigate_locate(): graph_walk filters by relation_type (not edge_kind);
--       bidirectional BFS; handle valid_until='infinity'
--   P1-D  navigate_locate raw_candidates: replace inline to_tsvector(topic) with stored
--         topic_tsv column (O(n) seq-scan → GIN-indexed stored generated column)
--
-- DELIBERATELY NOT REMOVED in 0.9.1 (joint Agency↔pgmnemo verdict 2026-06-13,
-- AGENCY_DOGFOOD_FEEDBACK_AND_CONSULT_2026-06-13.md §4 #3/#6):
--   traverse_causal_chain, traverse_temporal_window, recall_lessons_pooled have
--   zero current callers BUT are the only WORKING traversal SPs. Their 0% adoption
--   is "no nav loop is proven yet", NOT redundancy. Retire only AFTER the navigate_*
--   loop is validated by the nav-efficiency benchmark. Marked remove-candidate v1.0
--   below (COMMENT only — no DROP). recall_lessons retirement likewise deferred (§6).
--
-- Upgrade: ALTER EXTENSION pgmnemo UPDATE TO '0.9.1';

\echo Use "ALTER EXTENSION pgmnemo UPDATE TO '0.9.1'" to load this file. \quit

-- ══════════════════════════════════════════════════════════════════════════════
-- REMOVE-CANDIDATES v1.0 (NOT dropped in 0.9.1 — joint verdict §4 #3/#6)
-- The following SPs have zero current callers but are the only working traversal /
-- pooled-recall SPs today. Per the Agency↔pgmnemo consult they are retired ONLY
-- after the navigate_* loop is validated by the nav-efficiency benchmark — not now:
--   • pgmnemo.traverse_causal_chain(BIGINT, INT, TEXT[], BOOLEAN, TEXT)   [v0.2.1]
--   • pgmnemo.traverse_temporal_window(BIGINT, INTERVAL, BOOLEAN, TEXT, INT, INT) [v0.2.0]
--   • pgmnemo.recall_lessons_pooled(vector, INT, INT)                     [v0.1.2]
-- No DROP statements here by design.
-- ══════════════════════════════════════════════════════════════════════════════

-- ══════════════════════════════════════════════════════════════════════════════
-- #1: navigate_expand — fix graph traversal (P0)
-- ══════════════════════════════════════════════════════════════════════════════
--
-- Bugs fixed (all verified in pgmnemo--0.9.0.sql lines 4396-4405):
--
--   B1: edge_kind IN ('causal','temporal') filtered out ALL production edges because
--       backfill_mem_edge.py wrote edges with relation_type='CO_TEMPORAL' (unmapped in
--       add_edge's CASE → defaults to edge_kind='semantic'). Fix: filter by relation_type
--       which is the actual typed discriminator. New param: relation_types TEXT[] DEFAULT
--       NULL (NULL = traverse all active edges regardless of type).
--
--   B2: valid_until IS NULL excluded edges that might carry valid_until='infinity'
--       (following agent_lesson's t_valid_to convention instead of mem_edge's NULL
--       convention). Fix: (valid_until IS NULL OR valid_until = 'infinity'::TIMESTAMPTZ).
--
--   B3: Forward-only BFS (me.source_id = ge.node_id → me.target_id) means the agent
--       cannot discover backward relations. If the agent is at lesson B and there exists
--       "A CAUSED_BY B" (source=A, target=B), forward-only BFS from B finds nothing.
--       Fix: bidirectional — join on (source_id = node OR target_id = node), traverse to
--       the opposite endpoint.
--
--   B4: Default threshold 0.7 → 0.5. For navigation (not scoring), the agent should
--       see more connections and decide which to follow. 0.7 was unnecessarily aggressive
--       for sparse graphs.
--
-- Signature change: adds 5th param (TEXT[]) — DROP old 4-arg overload required.
-- ──────────────────────────────────────────────────────────────────────────────

-- Drop old 4-arg signature to prevent ambiguous overload resolution
DROP FUNCTION IF EXISTS pgmnemo.navigate_expand(BIGINT[], TEXT[], INT, FLOAT);

CREATE OR REPLACE FUNCTION pgmnemo.navigate_expand(
    ids                    BIGINT[],
    expand_fields          TEXT[]           DEFAULT '{}',
    graph_expand_depth     INT              DEFAULT 1,
    graph_expand_threshold FLOAT            DEFAULT 0.5,
    relation_types         TEXT[]           DEFAULT NULL
)
RETURNS TABLE (
    id                      BIGINT,
    content                 TEXT,
    expand_detail           JSONB,
    graph_neighbor_ids      BIGINT[],
    graph_neighbor_previews TEXT[],
    tokens_consumed         INT,
    navigation_path         TEXT
)
LANGUAGE plpgsql
STABLE
PARALLEL SAFE
AS $$
BEGIN
    IF ids IS NULL OR array_length(ids, 1) IS NULL THEN
        RETURN;
    END IF;

    RETURN QUERY
    WITH RECURSIVE
    -- Step 1: seed rows — the requested IDs
    seed_rows AS (
        SELECT
            al.id,
            al.lesson_text,
            al.metadata,
            CASE
                WHEN expand_fields IS NOT NULL AND array_length(expand_fields, 1) > 0
                THEN (
                    SELECT jsonb_object_agg(f, al.metadata->f)
                    FROM unnest(expand_fields) AS f
                    WHERE al.metadata ? f
                )
                ELSE NULL::JSONB
            END                                                            AS expand_detail,
            'content'::TEXT                                                AS navigation_path,
            0                                                              AS depth,
            ARRAY[al.id]                                                   AS path
        FROM pgmnemo.agent_lesson al
        WHERE al.id = ANY(ids)
          AND al.is_active
          AND al.t_valid_to = 'infinity'::TIMESTAMPTZ
    ),
    -- Step 2: BFS graph expansion — BIDIRECTIONAL, relation_type-gated, weight-gated
    graph_expand (node_id, lesson_text, metadata, depth, path) AS (
        -- Seed: input IDs
        SELECT
            sr.id,
            sr.lesson_text,
            sr.metadata,
            0,
            sr.path
        FROM seed_rows sr

        UNION ALL

        -- Recursive: traverse edges in BOTH directions, gated by relation_type + weight
        SELECT
            al.id,
            al.lesson_text,
            al.metadata,
            ge.depth + 1,
            ge.path || al.id
        FROM graph_expand ge
        JOIN pgmnemo.mem_edge me ON (
            me.source_id = ge.node_id OR me.target_id = ge.node_id
        )
        JOIN pgmnemo.agent_lesson al ON al.id = CASE
            WHEN me.source_id = ge.node_id THEN me.target_id
            ELSE me.source_id
        END
        WHERE graph_expand_depth >= 1
          AND ge.depth < graph_expand_depth
          -- B1 fix: filter by relation_type (the real discriminator)
          -- NULL relation_types = traverse ALL active edges (no type filter)
          AND (relation_types IS NULL OR me.relation_type = ANY(relation_types))
          -- Weight gate: configurable threshold (default 0.5)
          AND me.weight >= graph_expand_threshold::REAL
          -- B2 fix: handle both active-edge sentinel conventions
          AND (me.valid_until IS NULL OR me.valid_until = 'infinity'::TIMESTAMPTZ)
          -- Target lesson must be active
          AND al.is_active
          AND al.t_valid_to = 'infinity'::TIMESTAMPTZ
          -- Cycle guard: prevent revisiting nodes already in path
          AND NOT (al.id = ANY(ge.path))
    ),
    -- Step 3: collect expanded rows (exclude IDs already in seed)
    expanded_rows AS (
        SELECT DISTINCT ON (ge.node_id)
            ge.node_id                                                         AS id,
            ge.lesson_text,
            ge.metadata,
            CASE
                WHEN expand_fields IS NOT NULL AND array_length(expand_fields, 1) > 0
                THEN (
                    SELECT jsonb_object_agg(f, ge.metadata->f)
                    FROM unnest(expand_fields) AS f
                    WHERE ge.metadata ? f
                )
                ELSE NULL::JSONB
            END                                                                AS expand_detail,
            'graph_expand'::TEXT                                               AS navigation_path
        FROM graph_expand ge
        WHERE ge.depth > 0                         -- only expansion rows, not seeds
          AND NOT (ge.node_id = ANY(ids))          -- exclude original input IDs
        ORDER BY ge.node_id, ge.depth ASC          -- prefer shallower depth
    ),
    -- Step 4b: deduplicated BFS neighbors per seed (all depths)
    distinct_neighbors AS (
        SELECT DISTINCT ON (ge.path[1], ge.node_id)
            ge.path[1]                                     AS seed_id,
            ge.node_id,
            ge.depth,
            left(ge.lesson_text, 50)                       AS neighbor_preview
        FROM graph_expand ge
        WHERE ge.depth > 0
          AND NOT (ge.node_id = ANY(ids))
        ORDER BY ge.path[1], ge.node_id, ge.depth ASC
    ),
    -- Step 4c: aggregate neighbors per seed — positional correspondence guaranteed
    neighbor_summary AS (
        SELECT
            dn.seed_id,
            array_agg(dn.node_id ORDER BY dn.depth, dn.node_id)          AS neighbor_ids,
            array_agg(dn.neighbor_preview ORDER BY dn.depth, dn.node_id) AS neighbor_previews
        FROM distinct_neighbors dn
        GROUP BY dn.seed_id
    ),
    -- Step 5: union seed + expanded, seed takes priority on id collision
    combined AS (
        SELECT sr.id,
               sr.lesson_text                                  AS content,
               sr.expand_detail,
               ns.neighbor_ids                                 AS graph_neighbor_ids,
               ns.neighbor_previews                            AS graph_neighbor_previews,
               sr.navigation_path
        FROM seed_rows sr
        LEFT JOIN neighbor_summary ns ON ns.seed_id = sr.id

        UNION ALL

        SELECT er.id,
               er.lesson_text                                  AS content,
               er.expand_detail,
               NULL::BIGINT[]                                  AS graph_neighbor_ids,
               NULL::TEXT[]                                     AS graph_neighbor_previews,
               er.navigation_path
        FROM expanded_rows er
    )
    -- Step 6: attach cumulative tokens_consumed (chars as proxy)
    SELECT
        c.id,
        c.content,
        c.expand_detail,
        c.graph_neighbor_ids,
        c.graph_neighbor_previews,
        SUM(length(c.content)) OVER (
            ORDER BY c.id ASC
            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
        )::INT                                                 AS tokens_consumed,
        c.navigation_path
    FROM combined c
    ORDER BY c.id ASC;
END;
$$;

COMMENT ON FUNCTION pgmnemo.navigate_expand(BIGINT[], TEXT[], INT, FLOAT, TEXT[]) IS
    'Token-economy navigation EXPAND (v0.9.1). '
    'Returns full lesson_text + JSONB expansion for caller-chosen IDs. '
    'v0.9.1 fixes: '
    '(B1) relation_type filter replaces broken edge_kind filter — production edges had '
    'edge_kind=semantic for ALL types due to unmapped relation_types in backfill; '
    '(B2) valid_until handles both NULL and infinity sentinel conventions; '
    '(B3) bidirectional BFS — agent can discover backward relations (e.g. find cause '
    'from an effect by traversing CAUSED_BY edges in reverse); '
    '(B4) relation_types TEXT[] param — NULL traverses all, or pass specific types. '
    'Threshold default lowered: 0.5 (was 0.7) — navigation should be permissive; '
    'agent decides which connections to follow. '
    'Combine with navigate_locate() for the locate→connections→expand loop.';


-- ══════════════════════════════════════════════════════════════════════════════
-- #2: navigate_locate — fix graph_walk traversal
-- ══════════════════════════════════════════════════════════════════════════════
--
-- Same root cause as #1: graph_walk at line 4207 in 0.9.0 uses
--   me.edge_kind IN ('causal', 'temporal')
-- but should traverse by relation_type like recall_hybrid (line 1249).
--
-- Additional fixes:
--   - Bidirectional BFS for symmetric proximity scoring
--   - valid_until sentinel handling
--   - No edge_kind filter: for proximity scoring, ALL connected lessons contribute
--
-- No signature change — CREATE OR REPLACE is safe.
-- ──────────────────────────────────────────────────────────────────────────────

CREATE OR REPLACE FUNCTION pgmnemo.navigate_locate(
    query_embedding   vector(1024),
    query_text        TEXT,
    token_budget_chars INT              DEFAULT 2000,
    jsonb_filter      JSONB             DEFAULT NULL,
    project_id_filter INT               DEFAULT NULL
)
RETURNS TABLE (
    id              BIGINT,
    preview         TEXT,
    score           FLOAT8,
    tokens_consumed INT,
    navigation_path TEXT
)
LANGUAGE plpgsql
STABLE
PARALLEL SAFE
AS $$
#variable_conflict use_column
DECLARE
    _ef_search          INT;
    _include_unverified BOOLEAN;
    _tsquery            TSQUERY;
    _has_text           BOOLEAN;
    _has_vec            BOOLEAN;
    _graph_weight       DOUBLE PRECISION;
    _max_depth          CONSTANT INT := 2;  -- proximity scoring: 2-hop sufficient
    _rrf_k_f            DOUBLE PRECISION;
    _aux_scale          CONSTANT DOUBLE PRECISION := (0.8 / 61.0) / 0.76;
    _as_of_ts           TIMESTAMPTZ;
    _vec_weight         CONSTANT DOUBLE PRECISION := 0.4;
    _bm25_weight        CONSTANT DOUBLE PRECISION := 0.4;
    _raw_blend_weight   DOUBLE PRECISION;  -- v0.8.1 F2: cardinal raw score blend
BEGIN
    -- Validate: need at least one retrieval signal
    _has_vec  := query_embedding IS NOT NULL;
    _has_text := query_text IS NOT NULL AND length(trim(query_text)) > 0;
    IF NOT _has_vec AND NOT _has_text THEN
        RAISE EXCEPTION 'pgmnemo.navigate_locate: both query_embedding and query_text are NULL/empty';
    END IF;

    _rrf_k_f := 60.0;
    _raw_blend_weight := 1.0 / (_rrf_k_f + 1.0);  -- same order as max RRF per signal

    -- ef_search GUC
    BEGIN
        _ef_search := COALESCE(
            NULLIF(current_setting('pgmnemo.ef_search', TRUE), '')::INT, 100
        );
        IF _ef_search BETWEEN 10 AND 500 THEN
            EXECUTE format('SET LOCAL pgvector.hnsw.ef_search = %s', _ef_search);
        END IF;
    EXCEPTION WHEN OTHERS THEN NULL;
    END;

    -- include_unverified GUC
    BEGIN
        _include_unverified := COALESCE(
            current_setting('pgmnemo.include_unverified', TRUE)::BOOLEAN, FALSE
        );
    EXCEPTION WHEN OTHERS THEN
        _include_unverified := FALSE;
    END;

    -- as_of_ts GUC (bitemporal point-in-time filter)
    BEGIN
        _as_of_ts := NULLIF(current_setting('pgmnemo.as_of_timestamp', TRUE), '')::TIMESTAMPTZ;
    EXCEPTION WHEN OTHERS THEN
        _as_of_ts := NULL;
    END;

    -- graph_proximity_weight GUC
    BEGIN
        _graph_weight := COALESCE(
            NULLIF(current_setting('pgmnemo.graph_proximity_weight', TRUE), '')::DOUBLE PRECISION,
            0.2
        );
    EXCEPTION WHEN OTHERS THEN
        _graph_weight := 0.2;
    END;
    _graph_weight := GREATEST(0.0, LEAST(0.5, _graph_weight));

    -- Parse query_text → tsquery
    IF _has_text THEN
        BEGIN
            _tsquery := websearch_to_tsquery('english', query_text);
        EXCEPTION WHEN OTHERS THEN
            BEGIN
                _tsquery := plainto_tsquery('english', query_text);
            EXCEPTION WHEN OTHERS THEN
                _has_text := FALSE;
            END;
        END;
    END IF;

    RETURN QUERY
    WITH RECURSIVE
    -- Step 1: union candidates; JSONB predicate pushdown when jsonb_filter non-null
    raw_candidates AS (
        SELECT
            al.id,
            al.importance,
            al.created_at,
            al.commit_sha,
            al.verified_at,
            LEAST(length(al.lesson_text), 50)                                        AS text_len,
            -- vector score
            CASE
                WHEN _has_vec AND al.embedding IS NOT NULL
                THEN (1.0 - (al.embedding <=> query_embedding))::DOUBLE PRECISION
                ELSE 0.0::DOUBLE PRECISION
            END AS raw_vec_score,
            -- P1-D fix: use stored generated columns (topic_tsv, lesson_tsv) — GIN-indexed.
            -- Was: inline to_tsvector('english', COALESCE(topic,'')) — O(n) seq-scan per row.
            -- topic_tsv GENERATED ALWAYS AS (to_tsvector('english', coalesce(topic,''))) STORED
            -- pgmnemo_agent_lesson_topic_tsv_idx GIN index on topic_tsv.
            CASE
                WHEN _has_text AND (al.lesson_tsv @@ _tsquery
                     OR al.topic_tsv @@ _tsquery)
                THEN ts_rank_cd(
                    setweight(al.topic_tsv, 'A') || al.lesson_tsv,
                    _tsquery, 32)::DOUBLE PRECISION
                ELSE 0.0::DOUBLE PRECISION
            END AS raw_bm25_score
        FROM pgmnemo.agent_lesson al
        WHERE al.is_active
          AND (_include_unverified OR al.verified_at IS NOT NULL)
          -- JSONB predicate pushdown — uses GIN index on metadata when non-null
          AND (jsonb_filter IS NULL OR al.metadata @> jsonb_filter)
          -- project_id_filter — uses B-tree index pgmnemo_agent_lesson_project_idx
          AND (project_id_filter IS NULL OR al.project_id = project_id_filter)
          -- bitemporal: active rows only (unless as_of_ts set)
          AND (_as_of_ts IS NULL
               OR (al.t_valid_from <= _as_of_ts AND al.t_valid_to > _as_of_ts))
          AND (_as_of_ts IS NOT NULL OR al.t_valid_to = 'infinity'::TIMESTAMPTZ)
          -- union: matched by vector OR BM25 (including topic match)
          AND (
              (_has_vec  AND al.embedding  IS NOT NULL)
           OR (_has_text AND (al.lesson_tsv @@ _tsquery
               OR al.topic_tsv @@ _tsquery))
          )
    ),
    -- Step 2: sparse-safe RRF ranks (Cormack 2009)
    rrf_ranked AS (
        SELECT *,
            COUNT(*) OVER ()                                                          AS n_candidates,
            ROW_NUMBER() OVER (ORDER BY raw_vec_score DESC NULLS LAST)               AS vec_rank,
            CASE WHEN raw_bm25_score > 0
                 THEN RANK() OVER (PARTITION BY (raw_bm25_score > 0)
                                   ORDER BY raw_bm25_score DESC NULLS LAST)
                 ELSE NULL
            END                                                                       AS bm25_rank_sparse
        FROM raw_candidates
    ),
    -- Step 3: RRF score + aux tie-breaker (identical formula to recall_hybrid v0.6.2)
    scored AS (
        SELECT
            r.id,
            r.importance,
            r.created_at,
            r.commit_sha,
            r.verified_at,
            r.text_len,
            r.vec_rank,
            COALESCE(r.bm25_rank_sparse, r.n_candidates + 1) AS bm25_rank_eff,
            r.raw_vec_score,
            r.raw_bm25_score,
            -- v0.8.1 F2: ordinal RRF + cardinal raw score blend
            (_vec_weight  / (_rrf_k_f + r.vec_rank::DOUBLE PRECISION)
           + _bm25_weight / (_rrf_k_f + COALESCE(r.bm25_rank_sparse,
                                                   r.n_candidates + 1)::DOUBLE PRECISION)
           + _raw_blend_weight * (
                 _vec_weight  * r.raw_vec_score
               + _bm25_weight * r.raw_bm25_score))
                AS rrf_sparse
        FROM rrf_ranked r
    ),
    -- Step 4: top-5 anchors for graph BFS
    anchors AS (
        SELECT id FROM scored ORDER BY rrf_sparse DESC LIMIT 5
    ),
    -- Step 5: BFS on active edges from top-5 anchors — BIDIRECTIONAL
    -- v0.9.1 fix: traverses ALL relation_types (was edge_kind IN causal/temporal)
    -- Bidirectional: discovers both forward and backward graph neighbors
    graph_walk (anchor_id, depth, reached_id) AS (
        SELECT id, 0, id FROM anchors
        UNION ALL
        SELECT gw.anchor_id, gw.depth + 1,
               CASE WHEN me.source_id = gw.reached_id
                    THEN me.target_id
                    ELSE me.source_id
               END
        FROM graph_walk gw
        JOIN pgmnemo.mem_edge me ON (
            me.source_id = gw.reached_id OR me.target_id = gw.reached_id
        )
        WHERE gw.depth < _max_depth
          -- v0.9.1 fix: handle both active-edge sentinels
          AND (me.valid_until IS NULL OR me.valid_until = 'infinity'::TIMESTAMPTZ)
    ),
    -- Step 6: proximity score from BFS depth
    graph_proximity AS (
        SELECT
            gw.reached_id AS lesson_id,
            MAX(1.0 - gw.depth::DOUBLE PRECISION / _max_depth::DOUBLE PRECISION) AS proximity
        FROM graph_walk gw
        WHERE gw.depth > 0
        GROUP BY gw.reached_id
    ),
    -- Step 7: final score = (rrf_sparse + aux) * (1 + graph); safety cap at 200 rows
    -- v0.8.1 F1: graph is multiplicative re-rank (tie-breaker, not driver)
    final_ranked AS (
        SELECT
            s.id,
            s.text_len,
            s.vec_rank,
            s.bm25_rank_eff,
            (
                s.rrf_sparse
              + _aux_scale * (
                    0.05 * (s.importance::DOUBLE PRECISION / 5.0)
                  + 0.05 * GREATEST(0.0,
                               1.0 - LEAST(
                                   EXTRACT(EPOCH FROM (NOW() - s.created_at)) / (90.0 * 86400.0),
                                   1.0
                               )
                           )::DOUBLE PRECISION
                  + 0.05 * (CASE
                              WHEN s.commit_sha IS NOT NULL AND s.verified_at IS NOT NULL THEN 1.0
                              WHEN s.commit_sha IS NOT NULL                               THEN 0.4
                              ELSE                                                             0.0
                            END)::DOUBLE PRECISION
                )
            ) * (1.0 + _graph_weight * COALESCE(gp.proximity, 0.0))
              AS final_score
        FROM scored s
        LEFT JOIN graph_proximity gp ON gp.lesson_id = s.id
        ORDER BY (
                s.rrf_sparse
              + _aux_scale * (
                    0.05 * (s.importance::DOUBLE PRECISION / 5.0)
                  + 0.05 * GREATEST(0.0,
                               1.0 - LEAST(
                                   EXTRACT(EPOCH FROM (NOW() - s.created_at)) / (90.0 * 86400.0),
                                   1.0
                               )
                           )::DOUBLE PRECISION
                  + 0.05 * (CASE
                              WHEN s.commit_sha IS NOT NULL AND s.verified_at IS NOT NULL THEN 1.0
                              WHEN s.commit_sha IS NOT NULL                               THEN 0.4
                              ELSE                                                             0.0
                            END)::DOUBLE PRECISION
                )
            ) * (1.0 + _graph_weight * COALESCE(gp.proximity, 0.0))
            DESC
        LIMIT 200  -- safety cap: prevents unbounded result sets for large corpora
    ),
    -- Step 8: budget window — cumulative char sum over score-descending order
    budget_window AS (
        SELECT
            fr.id,
            fr.final_score,
            fr.text_len,
            fr.vec_rank,
            fr.bm25_rank_eff,
            SUM(fr.text_len) OVER (
                ORDER BY fr.final_score DESC, fr.id ASC
                ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
            ) AS cum_chars,
            ROW_NUMBER() OVER (ORDER BY fr.final_score DESC, fr.id ASC) AS rn
        FROM final_ranked fr
    )
    SELECT
        bw.id                                                                         AS id,
        left(al.lesson_text, 50)                                                      AS preview,
        bw.final_score::FLOAT8                                                        AS score,
        bw.cum_chars::INT                                                             AS tokens_consumed,
        -- navigation_path: jsonb_gate if filter was applied, else dominant retrieval signal
        CASE
            WHEN jsonb_filter IS NOT NULL THEN 'jsonb_gate'
            WHEN bw.vec_rank <= bw.bm25_rank_eff THEN 'vector'
            ELSE 'bm25'
        END                                                                           AS navigation_path
    FROM budget_window bw
    JOIN pgmnemo.agent_lesson al ON al.id = bw.id
    WHERE bw.rn = 1                                    -- always return first row
       OR (bw.cum_chars - bw.text_len) < token_budget_chars   -- include while under budget
    ORDER BY bw.final_score DESC, bw.id ASC;
END;
$$;

COMMENT ON FUNCTION pgmnemo.navigate_locate(vector, TEXT, INT, JSONB, INT) IS
    'Token-economy navigation LOCATE (v0.9.1). '
    'Ranks lessons using the same hybrid RRF+aux+graph formula as recall_hybrid. '
    'v0.9.1 fixes: (1) graph_walk now traverses ALL relation_types bidirectionally '
    '(was edge_kind IN causal/temporal — missed production edges with edge_kind=semantic); '
    '(2) raw_candidates uses stored topic_tsv column (GIN-indexed) instead of inline '
    'to_tsvector recompute — eliminates O(n) seq-scan on topic for BM25 match. '
    'Returns id/preview/score/tokens_consumed/navigation_path — preview is first ~50 chars. '
    'token_budget_chars: cumulative Unicode character (code-point) limit on delivered previews; '
    'first row always returned regardless of budget. '
    'jsonb_filter: WHERE metadata @> jsonb_filter pushed into candidate scan (uses GIN index). '
    'project_id_filter: scopes candidates to a single project (uses B-tree index). '
    'navigation_path: ''jsonb_gate'' when filter applied; ''vector'' when vec dominant; ''bm25'' otherwise. '
    'Combine with navigate_expand() to retrieve content for chosen IDs.';