--complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pgwaffles" to load this file. \quit -- Create the schema create schema pgwaffles; -- Create the tables for the extension -- Data model: -- INGREDIENT: name -- NEEDS, 0N INGREDIENT, 0N RECIPE: quantity, unit, yield -- RECIPE: title, picture, description, keywords, ranking -- HAS INSTRUCTIONS, 0N STEP, _11 RECIPE -- STEP: ordinality, description -- -- http://mocodo.wingi.net/ create table pgwaffles.ingredient ( id integer generated always as identity primary key, name text unique not null ); create table pgwaffles.recipe ( id integer generated always as identity primary key, title text unique not null, picture text, description text, keywords text[] not null, rating float ); create table pgwaffles.step ( recipeId integer not null references pgwaffles.recipe (id), ordinality integer not null, description text not null, primary key (recipeId, ordinality) ); create table pgwaffles.ingredientInRecipe ( recipeId integer not null references pgwaffles.recipe (id), yield integer not null, ingredientId integer not null references pgwaffles.ingredient (id), quantity decimal(5,3) not null, unit text not null ); -- Read the search results from allrecipes.com -- sort=re means we will sort by descending rating create temporary table allRecipes (html text); copy allRecipes from program 'curl -s "https://www.allrecipes.com/search/results/?wt=belgium%20waffles&sort=re" | hxnormalize -x | hxselect -i "section.recipe-section > article.fixed-recipe-card > div.fixed-recipe-card__info > a" | tr "\n" " "'; -- Create the temporary table to store the best recipe create temporary table processedRecipe( data text, recipe jsonb, title text generated always as ((recipe -> 'name')::text) stored, image text generated always as ( ((recipe -> 'image')::jsonb -> 'url')::text ) stored, description text generated always as ( (recipe -> 'description')::text ) stored, yield integer generated always as ( regexp_replace(( regexp_split_to_array((recipe -> 'recipeYield')::text,' '))[1], '"', '')::integer ) stored, ingredients jsonb generated always as (recipe -> 'recipeIngredient') stored, steps jsonb generated always as (recipe -> 'recipeInstructions') stored, keywords text[], rating double precision generated always as ( ((recipe -> 'aggregateRating')::jsonb -> 'ratingValue')::float ) stored ); do $$declare mylink text; _stmt text; begin -- Forge the copy query to get the html code source of the recipe with rawHtml(parts) as ( -- Let's get the url links at the begining select regexp_split_to_table(html, 'href="') from allRecipes ), myRecipes(url) as ( -- Let's stop at teh end of the link's url select split_part(parts, '/">', 1) from rawHtml -- The first part does not contain a real link where rawHtml.parts !~ '^