FastAPI-Backend-Models / services /topic_service.py
Yassine Mhirsi
feat: Add topic similarity service using Google Generative AI embeddings, enabling improved topic matching and similarity analysis. Update topic extraction logic to utilize this service and enhance overall functionality.
a453c29
raw
history blame
4.55 kB
"""Service for topic extraction from text using LangChain Groq"""
import logging
from typing import Optional, List
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
from langsmith import traceable
from config import GROQ_API_KEY
logger = logging.getLogger(__name__)
class TopicOutput(BaseModel):
"""Pydantic schema for topic extraction output"""
topic: str = Field(..., description="A specific, detailed topic description")
class TopicService:
"""Service for extracting topics from text arguments"""
def __init__(self):
self.llm = None
self.model_name = "openai/gpt-oss-safeguard-120b" # another model meta-llama/llama-4-scout-17b-16e-instruct
self.initialized = False
def initialize(self, model_name: Optional[str] = None):
"""Initialize the Groq LLM with structured output"""
if self.initialized:
logger.info("Topic service already initialized")
return
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY not found in environment variables")
if model_name:
self.model_name = model_name
try:
logger.info(f"Initializing topic extraction service with model: {self.model_name}")
llm = ChatGroq(
model=self.model_name,
api_key=GROQ_API_KEY,
temperature=0.0,
max_tokens=512,
)
# Bind structured output directly to the model
self.llm = llm.with_structured_output(TopicOutput)
self.initialized = True
logger.info("✓ Topic extraction service initialized successfully")
except Exception as e:
logger.error(f"Error initializing topic service: {str(e)}")
raise RuntimeError(f"Failed to initialize topic service: {str(e)}")
@traceable(name="extract_topic")
def extract_topic(self, text: str) -> str:
"""
Extract a topic from the given text/argument
Args:
text: The input text/argument to extract topic from
Returns:
The extracted topic string
"""
if not self.initialized:
self.initialize()
if not text or not isinstance(text, str):
raise ValueError("Text must be a non-empty string")
text = text.strip()
if len(text) == 0:
raise ValueError("Text cannot be empty")
system_message = """You are an information extraction model.
Extract a topic from the user text. The topic should be a single sentence that captures the main idea of the text in simple english.
Examples:
- Text: "Governments should subsidize electric cars to encourage adoption."
Output: topic="government subsidies for electric vehicle adoption"
- Text: "Raising the minimum wage will hurt small businesses and cost jobs."
Output: topic="raising the minimum wage and its economic impact on small businesses"
"""
try:
result = self.llm.invoke(
[
SystemMessage(content=system_message),
HumanMessage(content=text),
]
)
return result.topic
except Exception as e:
logger.error(f"Error extracting topic: {str(e)}")
raise RuntimeError(f"Topic extraction failed: {str(e)}")
def batch_extract_topics(self, texts: List[str]) -> List[str]:
"""
Extract topics from multiple texts
Args:
texts: List of input texts/arguments
Returns:
List of extracted topics
"""
if not self.initialized:
self.initialize()
if not texts or not isinstance(texts, list):
raise ValueError("Texts must be a non-empty list")
results = []
for text in texts:
try:
topic = self.extract_topic(text)
results.append(topic)
except Exception as e:
logger.error(f"Error extracting topic for text '{text[:50]}...': {str(e)}")
results.append(None) # Or raise, depending on desired behavior
return results
# Initialize singleton instance
topic_service = TopicService()