""" Populate the Supabase vector store with GAIA benchmark embeddings. Prerequisites: - SUPABASE_URL and SUPABASE_SERVICE_KEY in .env - pgvector extension enabled in Supabase - gaia_documents table created with columns: content (text), metadata (jsonb), embedding (vector) """ import os import json from dotenv import load_dotenv from supabase.client import Client, create_client from sentence_transformers import SentenceTransformer from gaia.utils import load_config load_dotenv() config = load_config() data_path = config["data"] supabase_url = os.getenv("SUPABASE_URL") supabase_key = os.getenv("SUPABASE_SERVICE_KEY") supabase: Client = create_client(supabase_url, supabase_key) embeddings = SentenceTransformer( model_name_or_path=config["vector_store"]["embedding_model_name"], cache_folder=config["models"]["cache_folder"], ) with open(data_path, "r") as jsonl_file: json_list = list(jsonl_file) documents = [] for json_str in json_list: json_data = json.loads(json_str) content = json_data["Question"] embedding = embeddings.encode(content, normalize_embeddings=True).tolist() documents.append({ "content": content, "metadata": { "source": "vector_search", "task_id": json_data["task_id"], }, "embedding": embedding, }) print(f"Inserting {len(documents)} documents into Supabase...") try: response = supabase.table("gaia_documents").insert(documents).execute() print("Done.") except Exception as e: print("Error inserting data into Supabase:", e)