Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / topic_similarity_langchain_example.py

Yassine Mhirsi

similarity

22ad0ba 3 days ago

1.67 kB

	import json
	import os
	from datetime import datetime
	from dotenv import load_dotenv
	load_dotenv()

	from langchain_community.vectorstores import FAISS
	from langchain_core.example_selectors import (
	SemanticSimilarityExampleSelector,
	)
	from langchain_google_genai import GoogleGenerativeAIEmbeddings

	# Load topics from data file
	with open(
	file="data/topics.json",
	encoding="utf-8"
	) as f:
	data = json.load(f)

	# Make sure each example is a dict with "topic" key (wrap as dict if plain string)
	def format_examples(examples):
	formatted = []
	for ex in examples:
	if isinstance(ex, str):
	formatted.append({"topic": ex})
	elif isinstance(ex, dict) and "topic" in ex:
	formatted.append({"topic": ex["topic"]})
	else:
	formatted.append({"topic": str(ex)})
	return formatted

	# topics.json should have a top-level "topics" key
	examples = data.get("topics", [])
	formatted_examples = format_examples(examples)

	start_time = datetime.now()
	example_selector = SemanticSimilarityExampleSelector.from_examples(
	examples=formatted_examples,
	embeddings=GoogleGenerativeAIEmbeddings(
	model="models/text-embedding-004",
	api_key=os.getenv("GOOGLE_API_KEY")
	),
	vectorstore_cls=FAISS,
	k=1,
	input_keys=["topic"],
	)

	# Example call to selector (for demonstration; remove in production)
	result = example_selector.select_examples(
	{"topic": "people who are terminally ill and suffering greatly should have the right to end their own life if they so desire."}
	)
	print(result)
	end_time = datetime.now()
	print(f"Time taken: {(end_time - start_time).total_seconds()} seconds")