FastAPI-Backend-Models / topic_similarity_langchain_example.py
Yassine Mhirsi
similarity
22ad0ba
raw
history blame
1.67 kB
import json
import os
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import (
SemanticSimilarityExampleSelector,
)
from langchain_google_genai import GoogleGenerativeAIEmbeddings
# Load topics from data file
with open(
file="data/topics.json",
encoding="utf-8"
) as f:
data = json.load(f)
# Make sure each example is a dict with "topic" key (wrap as dict if plain string)
def format_examples(examples):
formatted = []
for ex in examples:
if isinstance(ex, str):
formatted.append({"topic": ex})
elif isinstance(ex, dict) and "topic" in ex:
formatted.append({"topic": ex["topic"]})
else:
formatted.append({"topic": str(ex)})
return formatted
# topics.json should have a top-level "topics" key
examples = data.get("topics", [])
formatted_examples = format_examples(examples)
start_time = datetime.now()
example_selector = SemanticSimilarityExampleSelector.from_examples(
examples=formatted_examples,
embeddings=GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
api_key=os.getenv("GOOGLE_API_KEY")
),
vectorstore_cls=FAISS,
k=1,
input_keys=["topic"],
)
# Example call to selector (for demonstration; remove in production)
result = example_selector.select_examples(
{"topic": "people who are terminally ill and suffering greatly should have the right to end their own life if they so desire."}
)
print(result)
end_time = datetime.now()
print(f"Time taken: {(end_time - start_time).total_seconds()} seconds")