Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /topic_service.py

Yassine Mhirsi

feat: Add topic similarity service using Google Generative AI embeddings, enabling improved topic matching and similarity analysis. Update topic extraction logic to utilize this service and enhance overall functionality.

a453c29 6 days ago

raw

history blame

4.55 kB

	"""Service for topic extraction from text using LangChain Groq"""

	import logging
	from typing import Optional, List
	from langchain_core.messages import HumanMessage, SystemMessage
	from langchain_groq import ChatGroq
	from pydantic import BaseModel, Field
	from langsmith import traceable

	from config import GROQ_API_KEY

	logger = logging.getLogger(__name__)


	class TopicOutput(BaseModel):
	"""Pydantic schema for topic extraction output"""
	topic: str = Field(..., description="A specific, detailed topic description")


	class TopicService:
	"""Service for extracting topics from text arguments"""

	def __init__(self):
	self.llm = None
	self.model_name = "openai/gpt-oss-safeguard-120b" # another model meta-llama/llama-4-scout-17b-16e-instruct
	self.initialized = False

	def initialize(self, model_name: Optional[str] = None):
	"""Initialize the Groq LLM with structured output"""
	if self.initialized:
	logger.info("Topic service already initialized")
	return

	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY not found in environment variables")

	if model_name:
	self.model_name = model_name

	try:
	logger.info(f"Initializing topic extraction service with model: {self.model_name}")

	llm = ChatGroq(
	model=self.model_name,
	api_key=GROQ_API_KEY,
	temperature=0.0,
	max_tokens=512,
	)

	# Bind structured output directly to the model
	self.llm = llm.with_structured_output(TopicOutput)
	self.initialized = True

	logger.info("✓ Topic extraction service initialized successfully")

	except Exception as e:
	logger.error(f"Error initializing topic service: {str(e)}")
	raise RuntimeError(f"Failed to initialize topic service: {str(e)}")

	@traceable(name="extract_topic")
	def extract_topic(self, text: str) -> str:
	"""
	Extract a topic from the given text/argument

	Args:
	text: The input text/argument to extract topic from

	Returns:
	The extracted topic string
	"""
	if not self.initialized:
	self.initialize()

	if not text or not isinstance(text, str):
	raise ValueError("Text must be a non-empty string")

	text = text.strip()
	if len(text) == 0:
	raise ValueError("Text cannot be empty")

	system_message = """You are an information extraction model.
	Extract a topic from the user text. The topic should be a single sentence that captures the main idea of the text in simple english.

	Examples:
	- Text: "Governments should subsidize electric cars to encourage adoption."
	Output: topic="government subsidies for electric vehicle adoption"

	- Text: "Raising the minimum wage will hurt small businesses and cost jobs."
	Output: topic="raising the minimum wage and its economic impact on small businesses"
	"""

	try:
	result = self.llm.invoke(
	[
	SystemMessage(content=system_message),
	HumanMessage(content=text),
	]
	)

	return result.topic

	except Exception as e:
	logger.error(f"Error extracting topic: {str(e)}")
	raise RuntimeError(f"Topic extraction failed: {str(e)}")

	def batch_extract_topics(self, texts: List[str]) -> List[str]:
	"""
	Extract topics from multiple texts

	Args:
	texts: List of input texts/arguments

	Returns:
	List of extracted topics
	"""
	if not self.initialized:
	self.initialize()

	if not texts or not isinstance(texts, list):
	raise ValueError("Texts must be a non-empty list")

	results = []
	for text in texts:
	try:
	topic = self.extract_topic(text)
	results.append(topic)
	except Exception as e:
	logger.error(f"Error extracting topic for text '{text[:50]}...': {str(e)}")
	results.append(None) # Or raise, depending on desired behavior

	return results


	# Initialize singleton instance
	topic_service = TopicService()