\set ON_ERROR_STOP on CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION pg_gembed; \set embedder 'embed_anything' \set text_model 'sentence-transformers/all-MiniLM-L6-v2' \set image_model 'openai/clip-vit-base-patch32' \set image_path '/home/joel/dev/pg_gembed/image/Geralt.jpg' \set image_directory_path '/home/joel/dev/pg_gembed/image' -- ============================================================================= -- Text embedding functions -- ============================================================================= -- embed_text: single text → vector SELECT vector_dims(embed_text(:'embedder', :'text_model', 'Hello world')) > 0 AS embed_text_returns_nonempty_vector; embed_text_returns_nonempty_vector ------------------------------------ t (1 row) -- embed_texts: text[] → vector[] SELECT array_length( embed_texts(:'embedder', :'text_model', ARRAY['Hello', 'World']), 1) = 2 AS embed_texts_returns_two_vectors; embed_texts_returns_two_vectors --------------------------------- t (1 row) -- embed_texts: all dimensions must match SELECT (SELECT COUNT(DISTINCT vector_dims(e)) FROM unnest(embed_texts(:'embedder', :'text_model', ARRAY['foo', 'bar', 'baz'])) AS e) = 1 AS embed_texts_consistent_dims; embed_texts_consistent_dims ----------------------------- t (1 row) -- embed_texts_with_ids: SETOF (id, embedding) SELECT id, vector_dims(embedding) > 0 AS has_embedding FROM embed_texts_with_ids(:'embedder', :'text_model', ARRAY[1, 2], ARRAY['Hello', 'World']) ORDER BY id; id | has_embedding ----+--------------- 1 | t 2 | t (2 rows) -- ============================================================================= -- Image embedding functions -- ============================================================================= -- embed_image: single bytea → vector SELECT vector_dims( embed_image(:'embedder', :'image_model', pg_read_binary_file(:'image_path'))) > 0 AS embed_image_returns_nonempty_vector; embed_image_returns_nonempty_vector ------------------------------------- t (1 row) -- embed_images: bytea[] → vector[] SELECT array_length( embed_images(:'embedder', :'image_model', ARRAY[pg_read_binary_file(:'image_path')]), 1) = 1 AS embed_images_returns_one_vector; embed_images_returns_one_vector --------------------------------- t (1 row) -- embed_images_with_ids: SETOF (id, embedding) SELECT id, vector_dims(embedding) > 0 AS has_embedding FROM embed_images_with_ids(:'embedder', :'image_model', ARRAY[1], ARRAY[pg_read_binary_file(:'image_path')]) ORDER BY id; id | has_embedding ----+--------------- 1 | t (1 row) -- embed_image_directory: text → vector[] SELECT array_length( embed_image_directory(:'embedder', :'image_model', :'image_directory_path'), 1) > 0 AS embed_image_directory_returns_vectors; embed_image_directory_returns_vectors --------------------------------------- t (1 row) -- embed_image_directories: text[] → vector[] SELECT array_length( embed_image_directories(:'embedder', :'image_model', ARRAY[:'image_directory_path']), 1) > 0 AS embed_image_directories_returns_vectors; embed_image_directories_returns_vectors ----------------------------------------- t (1 row) -- ============================================================================= -- Multimodal embedding function -- ============================================================================= -- embed_multimodal: images + texts → vector[] SELECT array_length( embed_multimodal(:'embedder', :'image_model', ARRAY[pg_read_binary_file(:'image_path')], ARRAY['A cool image']), 1) > 0 AS embed_multimodal_returns_vectors; embed_multimodal_returns_vectors ---------------------------------- t (1 row) -- embed_multimodal: texts only SELECT array_length( embed_multimodal(:'embedder', :'image_model', NULL, ARRAY['Only text']), 1) > 0 AS embed_multimodal_text_only; embed_multimodal_text_only ---------------------------- t (1 row) -- embed_multimodal: images only SELECT array_length( embed_multimodal(:'embedder', :'image_model', ARRAY[pg_read_binary_file(:'image_path')], NULL), 1) > 0 AS embed_multimodal_image_only; embed_multimodal_image_only ----------------------------- t (1 row) -- ============================================================================= -- Polymorphic embed() dispatcher — scalar overload (anyelement → vector) -- ============================================================================= -- dispatch: text input SELECT vector_dims( embed(:'embedder', :'text_model', 'Hello world'::text, 'text'::input_type)) > 0 AS embed_dispatch_text; embed_dispatch_text --------------------- t (1 row) -- dispatch: image input SELECT vector_dims( embed(:'embedder', :'image_model', pg_read_binary_file(:'image_path'), 'image'::input_type)) > 0 AS embed_dispatch_image; embed_dispatch_image ---------------------- t (1 row) -- dispatch: image_directory returns the first vector from the directory SELECT vector_dims( embed(:'embedder', :'image_model', :'image_directory_path'::text, 'image_directory'::input_type)) > 0 AS embed_dispatch_image_directory; embed_dispatch_image_directory -------------------------------- t (1 row) -- ============================================================================= -- Polymorphic embed() dispatcher — array overload (anyarray → vector[]) -- ============================================================================= -- dispatch array: text[] SELECT array_length( embed(:'embedder', :'text_model', ARRAY['Hello', 'World'], 'text'::input_type), 1) = 2 AS embed_dispatch_array_text; embed_dispatch_array_text --------------------------- t (1 row) -- dispatch array: consistent dimensions across the batch SELECT (SELECT COUNT(DISTINCT vector_dims(e)) FROM unnest( embed(:'embedder', :'text_model', ARRAY['alpha', 'beta', 'gamma'], 'text'::input_type)) AS e) = 1 AS embed_dispatch_array_text_consistent_dims; embed_dispatch_array_text_consistent_dims ------------------------------------------- t (1 row) -- dispatch array: bytea[] SELECT array_length( embed(:'embedder', :'image_model', ARRAY[pg_read_binary_file(:'image_path')], 'image'::input_type), 1) = 1 AS embed_dispatch_array_image; embed_dispatch_array_image ---------------------------- t (1 row) -- dispatch array: text[] paths (image_directory) SELECT array_length( embed(:'embedder', :'image_model', ARRAY[:'image_directory_path'], 'image_directory'::input_type), 1) > 0 AS embed_dispatch_array_image_directory; embed_dispatch_array_image_directory -------------------------------------- t (1 row) -- dispatch array: empty array → NULL SELECT embed(:'embedder', :'text_model', ARRAY[]::text[], 'text'::input_type) IS NULL AS embed_dispatch_array_empty_returns_null; embed_dispatch_array_empty_returns_null ----------------------------------------- t (1 row)