/*------------------------------------------------------------------------- * * parse_name.c * * parse function signature * parse identifier, and type name * * by Pavel Stehule 2013-2021 * *------------------------------------------------------------------------- */ #include "plpgsql_check.h" #include "catalog/namespace.h" #include "parser/scansup.h" #include "parser/parse_type.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #if PG_VERSION_NUM < 110000 #include "utils/typcache.h" #endif /* * Is character a valid identifier start? * Must match scan.l's {ident_start} character class. */ static bool is_ident_start(unsigned char c) { /* Underscores and ASCII letters are OK */ if (c == '_') return true; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return true; /* Any high-bit-set character is OK (might be part of a multibyte char) */ if (IS_HIGHBIT_SET(c)) return true; return false; } /* * Is character a valid identifier continuation? * Must match scan.l's {ident_cont} character class. */ static bool is_ident_cont(unsigned char c) { /* Can be digit or dollar sign ... */ if ((c >= '0' && c <= '9') || c == '$') return true; /* ... or an identifier start character */ return is_ident_start(c); } /* * parse_ident - returns list of Strings when input is valid name. * Returns NIL, when input string is signature. Can raise a error, * when input is not valid identifier. */ static List * parse_name_or_signature(char *qualname, bool *is_signature) { char *nextp; char *rawname; bool after_dot = false; List *result = NIL; /* We need a modifiable copy of the input string. */ rawname = pstrdup(qualname); /* * The code below scribbles on qualname_str in some cases, so we should * reconvert qualname if we need to show the original string in error * messages. */ nextp = rawname; /* skip leading whitespace */ while (scanner_isspace(*nextp)) nextp++; for (;;) { char *curname; bool missing_ident = true; if (*nextp == '"') { char *endp; curname = nextp + 1; for (;;) { endp = strchr(nextp + 1, '"'); if (endp == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname), errdetail("String has unclosed double quotes."))); if (endp[1] != '"') break; memmove(endp, endp + 1, strlen(endp)); nextp = endp; } nextp = endp + 1; *endp = '\0'; if (endp - curname == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname), errdetail("Quoted identifier must not be empty."))); truncate_identifier(curname, strlen(curname), true); result = lappend(result, makeString(curname)); missing_ident = false; } else if (is_ident_start((unsigned char) *nextp)) { char *downname; int len; curname = nextp++; while (is_ident_cont((unsigned char) *nextp)) nextp++; len = nextp - curname; /* * We don't implicitly truncate identifiers. This is useful for * allowing the user to check for specific parts of the identifier * being too long. It's easy enough for the user to get the * truncated names by casting our output to name[]. */ downname = downcase_truncate_identifier(curname, len, false); result = lappend(result, makeString(downname)); missing_ident = false; } if (missing_ident) { /* Different error messages based on where we failed. */ if (*nextp == '.') ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname), errdetail("No valid identifier before \".\"."))); else if (after_dot) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname), errdetail("No valid identifier after \".\"."))); else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname))); } while (scanner_isspace(*nextp)) nextp++; if (*nextp == '.') { after_dot = true; nextp++; while (scanner_isspace(*nextp)) nextp++; } else if (*nextp == '\0') { break; } else if (*nextp == '(') { *is_signature = true; return NIL; } else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("string is not a valid identifier: \"%s\"", qualname))); } *is_signature = false; return result; } /* * Returns Oid of function specified by name or by signature * */ Oid plpgsql_check_parse_name_or_signature(char *name_or_signature) { List *names; bool is_signature; names = parse_name_or_signature(name_or_signature, &is_signature); if (!is_signature) { FuncCandidateList clist; clist = FuncnameGetCandidates(names, -1, NIL, false, false, #if PG_VERSION_NUM >= 140000 false, #endif true); if (clist == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", name_or_signature))); else if (clist->next != NULL) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", name_or_signature))); return clist->oid; } return DatumGetObjectId(DirectFunctionCall1(regprocedurein, CStringGetDatum(name_or_signature))); } #define PRAGMA_TOKEN_IDENTIF 128 #define PRAGMA_TOKEN_QIDENTIF 129 #define PRAGMA_TOKEN_NUMBER 130 /* * Originaly this structure was named TokenType, but this is in collision * with name from NT SDK. So it is renamed to PragmaTokenType. */ typedef struct { int value; const char *substr; size_t size; } PragmaTokenType; typedef struct { const char *str; PragmaTokenType saved_token; bool saved_token_is_valid; } TokenizerState; /* * Tokenize text. Only one error is possible here - unclosed double quotes. * Returns NULL, when found EOL or on error. */ static PragmaTokenType * get_token(TokenizerState *state, PragmaTokenType *token) { if (state->saved_token_is_valid) { state->saved_token_is_valid = false; return &state->saved_token; } /* skip inital spaces */ while (*state->str == ' ') state->str++; if (!*state->str) return NULL; if (isdigit(*state->str)) { bool have_dot = false; token->value = PRAGMA_TOKEN_NUMBER; token->substr = state->str++; while (isdigit(*state->str) || (*state->str == '.')) { if (*state->str == '.') { if (!have_dot) have_dot = true; else break; } state->str += 1; } } else if (*state->str == '"') { bool is_error = true; token->value = PRAGMA_TOKEN_QIDENTIF; token->substr = state->str++; is_error = true; while (*state->str) { if (*state->str == '"') { state->str += 1; if (*state->str != '"') { is_error = false; break; } } state->str += 1; } if (is_error) elog(ERROR, "Syntax error (unclosed quoted identifier)"); } else if (is_ident_start(*state->str)) { token->value = PRAGMA_TOKEN_IDENTIF; token->substr = state->str++; while (is_ident_cont(*state->str)) state->str += 1; } else token->value = *state->str++; token->size = state->str - token->substr; return token; } static void unget_token(TokenizerState *state, PragmaTokenType *token) { if (token) { state->saved_token.value = token->value; state->saved_token.substr = token->substr; state->saved_token.size = token->size; state->saved_token_is_valid = true; } else state->saved_token_is_valid = false; } static bool token_is_keyword(PragmaTokenType *token, const char *str) { if (!token) return false; if (token->value == PRAGMA_TOKEN_IDENTIF && token->size == strlen(str) && strncasecmp(token->substr, str, token->size) == 0) return true; return false; } /* * Returns true if all tokens was read */ static bool tokenizer_eol(TokenizerState *state) { if (state->saved_token_is_valid) return false; while (*state->str) { if (!isspace(*state->str)) return false; state->str += 1; } return true; } static void initialize_tokenizer(TokenizerState *state, const char *str) { state->str = str; state->saved_token_is_valid = false; } static char * make_ident(PragmaTokenType *token) { if (token->value == PRAGMA_TOKEN_IDENTIF) { return downcase_truncate_identifier(token->substr, token->size, false); } else if (token->value == PRAGMA_TOKEN_QIDENTIF) { char *result = palloc(token->size); const char *ptr = token->substr + 1; char *write_ptr; int n = token->size - 2; write_ptr = result; while (n-- > 0) { *write_ptr++ = *ptr; if (*ptr++ == '"') { ptr += 1; n -= 1; } } *write_ptr = '\0'; truncate_identifier(result, write_ptr - result, false); return result; } return NULL; } /* * Returns list of strings used in qualified identifiers */ static List * get_qualified_identifier(TokenizerState *state, List *result) { PragmaTokenType token, *_token; bool read_atleast_one = false; while (1) { _token = get_token(state, &token); if (!_token) break; if (_token->value != PRAGMA_TOKEN_IDENTIF && _token->value != PRAGMA_TOKEN_QIDENTIF) elog(ERROR, "Syntax error (expected identifier)"); result = lappend(result, make_ident(_token)); read_atleast_one = true; _token = get_token(state, &token); if (!_token) break; if (_token->value != '.') { unget_token(state, _token); break; } } if (!read_atleast_one) elog(ERROR, "Syntax error (expected identifier)"); return result; } /* * Set start position and length of qualified identifier. Returns true, * if parsed identifier is valid. */ static void parse_qualified_identifier(TokenizerState *state, const char **startptr, int *size) { PragmaTokenType token, *_token; bool read_atleast_one = false; const char *_startptr = *startptr; int _size = *size; while (1) { _token = get_token(state, &token); if (!_token) break; if (_token->value != PRAGMA_TOKEN_IDENTIF && _token->value != PRAGMA_TOKEN_QIDENTIF) elog(ERROR, "Syntax error (expected identifier)"); if (!_startptr) { _startptr = _token->substr; _size = _token->size; } else _size = _token->substr - _startptr + _token->size; read_atleast_one = true; _token = get_token(state, &token); if (!_token) break; if (_token->value != '.') { unget_token(state, _token); break; } } if (!read_atleast_one) elog(ERROR, "Syntax error (expected identifier)"); *startptr = _startptr; *size = _size; } /* * When rectype is not allowed, then composite type is allowed only * on top level. */ static Oid get_type_internal(TokenizerState *state, int32 *typmod, bool allow_rectype, bool istop) { PragmaTokenType token, *_token; const char *typename_start = NULL; int typename_length = 0; const char *typestr; TypeName *typeName = NULL; Oid typtype; _token = get_token(state, &token); if (!_token) elog(ERROR, "Syntax error (expected identifier)"); if (_token->value == '(') { TupleDesc resultTupleDesc; List *names = NIL; List *types = NIL; List *typmods = NIL; List *collations = NIL; if (!allow_rectype && !istop) elog(ERROR, "Cannot to create table with pseudo-type record."); _token = get_token(state, &token); if (token_is_keyword(_token, "like")) { typtype = get_type_internal(state, typmod, allow_rectype, false); if (!type_is_rowtype(typtype)) elog(ERROR, "\"%s\" is not composite type", format_type_be(typtype)); _token = get_token(state, &token); if (!_token || _token->value != ')') elog(ERROR, "Syntax error (expected \")\")"); return typtype; } else unget_token(state, _token); while (1) { Oid _typtype; int32 _typmod; _token = get_token(state, &token); if (!_token || (_token->value != PRAGMA_TOKEN_IDENTIF && _token->value != PRAGMA_TOKEN_QIDENTIF)) elog(ERROR, "Syntax error (expected identifier)"); names = lappend(names, makeString(make_ident(_token))); _typtype = get_type_internal(state, &_typmod, allow_rectype, false); types = lappend_oid(types, _typtype); typmods = lappend_int(typmods, _typmod); collations = lappend_oid(collations, InvalidOid); _token = get_token(state, &token); if (!_token) elog(ERROR, "Syntax error (unclosed composite type definition - expected \")\")"); if (_token->value == ')') { break; } else if (_token->value != ',') elog(ERROR, "Syntax error (expected \",\")"); } resultTupleDesc = BuildDescFromLists(names, types, typmods, collations); resultTupleDesc = BlessTupleDesc(resultTupleDesc); *typmod = resultTupleDesc->tdtypmod; return resultTupleDesc->tdtypeid; } else if (_token->value == PRAGMA_TOKEN_QIDENTIF) { unget_token(state, _token); parse_qualified_identifier(state, &typename_start, &typename_length); } else if (_token->value == PRAGMA_TOKEN_IDENTIF) { PragmaTokenType token2, *_token2; _token2 = get_token(state, &token2); if (_token2) { if (_token2->value == '.') { typename_start = _token->substr; typename_length = _token->size; parse_qualified_identifier(state, &typename_start, &typename_length); } else { /* multi word type name */ typename_start = _token->substr; typename_length = _token->size; while (_token2 && _token2->value == PRAGMA_TOKEN_IDENTIF) { typename_length = _token2->substr + _token2->size - typename_start; _token2 = get_token(state, &token2); } unget_token(state, _token2); } } else { typename_start = _token->substr; typename_length = _token->size; } } else elog(ERROR, "Syntax error (expected identifier)"); /* get typmod */ _token = get_token(state, &token); if (_token) { if (_token->value == '(') { while (1) { _token = get_token(state, &token); if (!_token || _token->value != PRAGMA_TOKEN_NUMBER) elog(ERROR, "Syntax error (expected number for typmod specification)"); _token = get_token(state, &token); if (!_token) elog(ERROR, "Syntax error (unclosed typmod specification)"); if (_token->value == ')') { break; } else if (_token->value != ',') elog(ERROR, "Syntax error (expected \",\" in typmod list)"); } typename_length = _token->substr + _token->size - typename_start; } else unget_token(state, _token); } /* get array symbols */ _token = get_token(state, &token); if (_token) { if (_token->value == '[') { _token = get_token(state, &token); if (_token && _token->value == PRAGMA_TOKEN_NUMBER) _token = get_token(state, &token); if (!_token) elog(ERROR, "Syntax error (unclosed array specification)"); if (_token->value != ']') elog(ERROR, "Syntax error (expected \"]\")"); typename_length = _token->substr + _token->size - typename_start; } else unget_token(state, _token); } typestr = pnstrdup(typename_start, typename_length); typeName = typeStringToTypeName(typestr); typenameTypeIdAndMod(NULL, typeName, &typtype, typmod); return typtype; } static Oid get_type(TokenizerState *state, int32 *typmod, bool allow_rectype) { return get_type_internal(state, typmod, allow_rectype, true); } static int get_varno(PLpgSQL_nsitem *cur_ns, List *names) { char *name1 = NULL; char *name2 = NULL; char *name3 = NULL; int names_used; PLpgSQL_nsitem *nsitem; switch (list_length(names)) { case 1: { name1 = (char *) linitial(names); break; } case 2: { name1 = (char *) linitial(names); name2 = (char *) lsecond(names); break; } case 3: { name1 = (char *) linitial(names); name2 = (char *) lsecond(names); name3 = (char *) lthird(names); break; } default: return -1; } nsitem = plpgsql_check__ns_lookup_p(cur_ns, false, name1, name2, name3, &names_used); return nsitem ? nsitem->itemno : -1; } static char * get_name(List *names) { bool is_first = true; ListCell *lc; StringInfoData sinfo; initStringInfo(&sinfo); foreach(lc, names) { if (is_first) is_first = false; else appendStringInfoChar(&sinfo, '.'); appendStringInfo(&sinfo, "\"%s\"", (char *) lfirst(lc)); } return sinfo.data; } bool plpgsql_check_pragma_type(PLpgSQL_checkstate *cstate, const char *str, PLpgSQL_nsitem *ns, int lineno) { MemoryContext oldCxt; ResourceOwner oldowner; volatile bool result = true; /* * namespace is available only in compile check mode, and only in this mode * this pragma can be used. */ if (!ns || !cstate) return true; oldCxt = CurrentMemoryContext; oldowner = CurrentResourceOwner; BeginInternalSubTransaction(NULL); MemoryContextSwitchTo(cstate->check_cxt); PG_TRY(); { TokenizerState tstate; int target_dno; PLpgSQL_datum *target; List *names; Oid typtype; int32 typmod; TupleDesc typtupdesc; initialize_tokenizer(&tstate, str); names = get_qualified_identifier(&tstate, NULL); if ((target_dno = get_varno(ns, names)) == -1) elog(ERROR, "Cannot to find variable \"%s\" used in settype pragma", get_name(names)); target = cstate->estate->datums[target_dno]; if (target->dtype != PLPGSQL_DTYPE_REC) elog(ERROR, "Pragma \"settype\" can be applied only on variable of record type"); typtype = get_type(&tstate, &typmod, true); if (!tokenizer_eol(&tstate)) elog(ERROR, "Syntax error (unexpected chars after type specification)"); typtupdesc = lookup_rowtype_tupdesc_copy(typtype, typmod); plpgsql_check_assign_tupdesc_dno(cstate, target_dno, typtupdesc, false); cstate->typed_variables = bms_add_member(cstate->typed_variables, target_dno); RollbackAndReleaseCurrentSubTransaction(); MemoryContextSwitchTo(oldCxt); CurrentResourceOwner = oldowner; SPI_restore_connection(); } PG_CATCH(); { ErrorData *edata; MemoryContextSwitchTo(cstate->check_cxt); edata = CopyErrorData(); FlushErrorState(); MemoryContextSwitchTo(oldCxt); FlushErrorState(); RollbackAndReleaseCurrentSubTransaction(); MemoryContextSwitchTo(oldCxt); CurrentResourceOwner = oldowner; /* reconnect spi */ SPI_restore_connection(); /* raise warning (errors in pragma can be ignored instead */ ereport(WARNING, (errmsg("Pragma \"type\" on line %d is not processed.", lineno), errdetail("%s", edata->message))); result = false; } PG_END_TRY(); return result; } /* * Unfortunately the ephemeral tables introduced in PostgreSQL 10 cannot be * used for this purpose, because any DML operations are prohibited, and others * DML catalogue operations doesn't calculate with Ephemeral space. */ bool plpgsql_check_pragma_table(PLpgSQL_checkstate *cstate, const char *str, int lineno) { MemoryContext oldCxt; ResourceOwner oldowner; volatile bool result = true; if (!cstate) return true; oldCxt = CurrentMemoryContext; oldowner = CurrentResourceOwner; BeginInternalSubTransaction(NULL); MemoryContextSwitchTo(cstate->check_cxt); PG_TRY(); { TokenizerState tstate; PragmaTokenType token, *_token; StringInfoData query; int32 typmod; initialize_tokenizer(&tstate, str); _token = get_token(&tstate, &token); if (!_token || (_token->value != PRAGMA_TOKEN_IDENTIF && _token->value != PRAGMA_TOKEN_QIDENTIF)) elog(ERROR, "Syntax error (expected identifier)"); _token = get_token(&tstate, &token); if (!_token || _token->value != '(') elog(ERROR, "Syntax error (expected table specification)"); unget_token(&tstate, _token); (void) get_type(&tstate, &typmod, false); if (!tokenizer_eol(&tstate)) elog(ERROR, "Syntax error (unexpected chars after table specification)"); /* In this case we use parser just for syntax check and security check */ initStringInfo(&query); appendStringInfoString(&query, "CREATE TEMP TABLE "); appendStringInfoString(&query, str); if (SPI_execute(query.data, false, 0) != SPI_OK_UTILITY) elog(NOTICE, "Cannot to create temporary table"); ReleaseCurrentSubTransaction(); MemoryContextSwitchTo(oldCxt); CurrentResourceOwner = oldowner; SPI_restore_connection(); } PG_CATCH(); { ErrorData *edata; MemoryContextSwitchTo(cstate->check_cxt); edata = CopyErrorData(); FlushErrorState(); MemoryContextSwitchTo(oldCxt); FlushErrorState(); RollbackAndReleaseCurrentSubTransaction(); MemoryContextSwitchTo(oldCxt); CurrentResourceOwner = oldowner; /* reconnect spi */ SPI_restore_connection(); /* raise warning (errors in pragma can be ignored instead */ ereport(WARNING, (errmsg("Pragma \"table\" on line %d is not processed.", lineno), errdetail("%s", edata->message))); result = false; } PG_END_TRY(); return result; }