|
|
import json |
|
|
import os |
|
|
from datetime import datetime |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
|
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_core.example_selectors import ( |
|
|
SemanticSimilarityExampleSelector, |
|
|
) |
|
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings |
|
|
|
|
|
|
|
|
with open( |
|
|
file="data/topics.json", |
|
|
encoding="utf-8" |
|
|
) as f: |
|
|
data = json.load(f) |
|
|
|
|
|
|
|
|
def format_examples(examples): |
|
|
formatted = [] |
|
|
for ex in examples: |
|
|
if isinstance(ex, str): |
|
|
formatted.append({"topic": ex}) |
|
|
elif isinstance(ex, dict) and "topic" in ex: |
|
|
formatted.append({"topic": ex["topic"]}) |
|
|
else: |
|
|
formatted.append({"topic": str(ex)}) |
|
|
return formatted |
|
|
|
|
|
|
|
|
examples = data.get("topics", []) |
|
|
formatted_examples = format_examples(examples) |
|
|
|
|
|
start_time = datetime.now() |
|
|
example_selector = SemanticSimilarityExampleSelector.from_examples( |
|
|
examples=formatted_examples, |
|
|
embeddings=GoogleGenerativeAIEmbeddings( |
|
|
model="models/text-embedding-004", |
|
|
api_key=os.getenv("GOOGLE_API_KEY") |
|
|
), |
|
|
vectorstore_cls=FAISS, |
|
|
k=1, |
|
|
input_keys=["topic"], |
|
|
) |
|
|
|
|
|
|
|
|
result = example_selector.select_examples( |
|
|
{"topic": "people who are terminally ill and suffering greatly should have the right to end their own life if they so desire."} |
|
|
) |
|
|
print(result) |
|
|
end_time = datetime.now() |
|
|
print(f"Time taken: {(end_time - start_time).total_seconds()} seconds") |