import json import os from datetime import datetime from dotenv import load_dotenv load_dotenv() from langchain_community.vectorstores import FAISS from langchain_core.example_selectors import ( SemanticSimilarityExampleSelector, ) from langchain_google_genai import GoogleGenerativeAIEmbeddings # Load topics from data file with open( file="data/topics.json", encoding="utf-8" ) as f: data = json.load(f) # Make sure each example is a dict with "topic" key (wrap as dict if plain string) def format_examples(examples): formatted = [] for ex in examples: if isinstance(ex, str): formatted.append({"topic": ex}) elif isinstance(ex, dict) and "topic" in ex: formatted.append({"topic": ex["topic"]}) else: formatted.append({"topic": str(ex)}) return formatted # topics.json should have a top-level "topics" key examples = data.get("topics", []) formatted_examples = format_examples(examples) start_time = datetime.now() example_selector = SemanticSimilarityExampleSelector.from_examples( examples=formatted_examples, embeddings=GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", api_key=os.getenv("GOOGLE_API_KEY") ), vectorstore_cls=FAISS, k=1, input_keys=["topic"], ) # Example call to selector (for demonstration; remove in production) result = example_selector.select_examples( {"topic": "people who are terminally ill and suffering greatly should have the right to end their own life if they so desire."} ) print(result) end_time = datetime.now() print(f"Time taken: {(end_time - start_time).total_seconds()} seconds")