Spaces:

MCP-1st-Birthday
/

AIQuoteClipGenerator

Running

File size: 12,835 Bytes

"""
Gemini-powered quote generator with variety tracking
Eliminates repetitive quotes by maintaining history
"""
import google.generativeai as genai
import os
import json
import time
from typing import Optional


class QuoteGenerator:
    """
    Gemini-powered quote generator with built-in variety tracking.
    Prevents repetitive quotes by maintaining history in persistent storage,
    tracked per niche.
    """

    def __init__(self, api_key: Optional[str] = None, history_file: str = "/data/quote_history.json"):
        """
        Initialize with Gemini API.

        Args:
            api_key: Gemini API key (defaults to GEMINI_API_KEY env var)
            history_file: Path to persistent quote history storage
        """
        api_key = api_key or os.getenv("GEMINI_API_KEY")
        if not api_key:
            raise ValueError("GEMINI_API_KEY not found in environment variables")

        genai.configure(api_key=api_key)
        # You can switch to "gemini-1.5-pro" if you want higher quality
        self.model = genai.GenerativeModel("gemini-1.5-flash")

        # Persistent storage for quote history - PER NICHE
        self.history_file = history_file
        self.quotes_by_niche = self._load_history()  # dict: niche -> list[str]
        self.max_history = 100  # Keep last 100 quotes PER NICHE

    def _load_history(self) -> dict:
        """Load quote history from persistent storage - organized by niche"""
        try:
            if os.path.exists(self.history_file):
                with open(self.history_file, "r") as f:
                    data = json.load(f)
                    # Support both old format (list) and new format (dict)
                    if "quotes_by_niche" in data:
                        print(f"📖 Loaded history for {len(data['quotes_by_niche'])} niches")
                        return data["quotes_by_niche"]
                    elif "quotes" in data:
                        # Old format - migrate to a default niche
                        print("📖 Migrating to per-niche tracking (General)")
                        return {"General": data["quotes"]}
        except Exception as e:
            print(f"Could not load history: {e}")
        return {}

    def _save_history(self):
        """Save quote history to persistent storage"""
        try:
            os.makedirs(os.path.dirname(self.history_file), exist_ok=True)
            # Save all niches, keeping last max_history per niche
            trimmed_data = {}
            for niche, quotes in self.quotes_by_niche.items():
                trimmed_data[niche] = quotes[-self.max_history:]

            with open(self.history_file, "w") as f:
                json.dump({"quotes_by_niche": trimmed_data}, f, indent=2)
        except Exception as e:
            print(f"Could not save history: {e}")

    def generate_quote(self, niche: str, style: str) -> str:
        """
        Generate a unique, SHORT quote using Gemini with variety enforcement.

        Args:
            niche: Quote category (Motivation, Business, etc.)
            style: Visual style (Cinematic, Nature, etc.)

        Returns:
            A unique quote string.
        """
        # Ensure niche bucket exists
        if niche not in self.quotes_by_niche:
            self.quotes_by_niche[niche] = []

        recent_quotes = self.quotes_by_niche[niche]

        # Get recent quotes to avoid (last 30 for better variety)
        recent_quotes_text = ""
        if recent_quotes:
            recent_quotes_text = (
                f"\n\nPREVIOUSLY GENERATED {niche.upper()} QUOTES "
                f"(DO NOT REPEAT OR PARAPHRASE ANY OF THESE):\n"
            )
            for i, quote in enumerate(recent_quotes[-30:], 1):
                recent_quotes_text += f"{i}. {quote}\n"

            print(
                f"📊 {niche} history: {len(recent_quotes)} total quotes, "
                f"showing last {min(30, len(recent_quotes))} to avoid"
            )
        else:
            print(f"📊 No {niche} history yet - generating first quote for this niche")

        # Build prompt with strong variety + length control
        prompt = f"""
Generate a COMPLETELY UNIQUE and SHORT {niche} quote suitable for an Instagram/TikTok video overlay.

Visual style / mood context: {style}

STRICT FORMAT:
- 1–2 short sentences ONLY (NO paragraphs)
- Maximum 22 words total
- The quote must be self-contained and readable in one quick glance on screen

STYLE REQUIREMENTS:
- Sharp, specific, and emotionally or philosophically strong
- NO clichés (avoid things like "believe in yourself", "follow your dreams", "the only limit", "trust the process")
- NO generic motivational filler, no long poetic rambling
- Avoid starting with bland openers like "Sometimes", "In life", "When you", "If you", "Success is"

VARIETY REQUIREMENTS:
- Must be RADICALLY DIFFERENT from all previously generated quotes below
- Do NOT reuse the same metaphors, structure, or idea as earlier quotes
- New angle, new imagery, new insight

{recent_quotes_text}

Return ONLY the quote text, nothing else:
- No quotation marks
- No author name
- No emojis
- No extra commentary
"""

        try:
            # Generate with moderate temperature and low token limit to keep it tight
            response = self.model.generate_content(
                prompt,
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.9,
                    "top_k": 40,
                    "max_output_tokens": 60,  # keeps it short
                },
            )

            quote = response.text.strip()
            quote = quote.strip('"').strip("'").strip()

            # If for some reason it's still long, hard-trim by words
            words = quote.split()
            if len(words) > 22:
                quote = " ".join(words[:22])

            # Check if this exact quote already exists in THIS niche
            if quote in recent_quotes:
                print(f"⚠️ WARNING: Generated duplicate {niche} quote! Retrying once with higher temperature...")
                retry_response = self.model.generate_content(
                    prompt,
                    generation_config={
                        "temperature": 0.9,
                        "top_p": 0.95,
                        "top_k": 60,
                        "max_output_tokens": 60,
                    },
                )
                quote_retry = retry_response.text.strip().strip('"').strip("'").strip()
                words_retry = quote_retry.split()
                if len(words_retry) > 22:
                    quote_retry = " ".join(words_retry[:22])
                quote = quote_retry

            # Store in this niche's history and persist
            self.quotes_by_niche[niche].append(quote)
            self._save_history()

            print(f"✅ Generated {niche} quote #{len(self.quotes_by_niche[niche])}")
            print(f"💾 History file: {self.history_file}")

            return quote

        except Exception as e:
            raise Exception(f"Gemini quote generation failed: {str(e)}")

    def get_stats(self) -> dict:
        """Get statistics about quote generation - per niche"""
        total_quotes = sum(len(quotes) for quotes in self.quotes_by_niche.values())
        return {
            "total_quotes_generated": total_quotes,
            "quotes_by_niche": {
                niche: len(quotes) for niche, quotes in self.quotes_by_niche.items()
            },
            "niches_tracked": len(self.quotes_by_niche),
        }

    def clear_history(self, niche: Optional[str] = None):
        """
        Clear quote history (use with caution).

        Args:
            niche: If provided, clear only this niche. Otherwise clear all.
        """
        if niche:
            if niche in self.quotes_by_niche:
                self.quotes_by_niche[niche] = []
        else:
            self.quotes_by_niche = {}
        self._save_history()


# Hybrid generator with fallback
class HybridQuoteGenerator:
    """
    Hybrid system using Gemini as primary, OpenAI as fallback.
    """

    def __init__(self, gemini_key: Optional[str] = None, openai_client=None):
        """
        Initialize hybrid generator.

        Args:
            gemini_key: Gemini API key
            openai_client: OpenAI client instance (for fallback)
        """
        self.openai_client = openai_client

        try:
            self.gemini_generator = QuoteGenerator(api_key=gemini_key)
            self.gemini_available = True
            print("✅ Gemini generator initialized")
        except Exception as e:
            self.gemini_available = False
            print(f"⚠️ Gemini not available: {e}")

    def generate_quote(self, niche: str, style: str, prefer_gemini: bool = True) -> dict:
        """
        Generate quote with automatic fallback.

        Args:
            niche: Quote category
            style: Visual style
            prefer_gemini: Try Gemini first if True

        Returns:
            Dict with quote, source, and metadata.
        """
        # Try Gemini first
        if prefer_gemini and self.gemini_available:
            try:
                quote = self.gemini_generator.generate_quote(niche, style)
                stats = self.gemini_generator.get_stats()
                return {
                    "quote": quote,
                    "source": "gemini",
                    "stats": stats,
                    "success": True,
                }
            except Exception as e:
                print(f"⚠️ Gemini failed, falling back to OpenAI: {e}")

        # Fallback to OpenAI
        if self.openai_client:
            try:
                quote = self._generate_openai(niche, style)
                return {
                    "quote": quote,
                    "source": "openai",
                    "stats": None,
                    "success": True,
                }
            except Exception as e:
                return {
                    "quote": None,
                    "source": None,
                    "error": f"Both generators failed: {str(e)}",
                    "success": False,
                }

        return {
            "quote": None,
            "source": None,
            "error": "No generator available",
            "success": False,
        }

    def _generate_openai(self, niche: str, style: str) -> str:
        """OpenAI fallback generator (short quote version)"""
        prompt = f"""
Generate a UNIQUE, SHORT {niche} quote for an Instagram/TikTok video overlay.

Style / mood: {style}

Requirements:
- 1–2 short sentences ONLY
- Maximum 22 words
- Inspirational or insightful, but NOT generic
- Avoid clichés like "believe in yourself", "follow your dreams", "the only limit", "trust the process"
- No paragraphs, no long explanations

Return ONLY the quote text, nothing else (no quotes, no author, no emojis).
"""

        seed = int(time.time() * 1000) % 1000

        response = self.openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": f"You are a concise, original quote generator. Seed: {seed}",
                },
                {"role": "user", "content": prompt},
            ],
            max_tokens=60,
            temperature=0.7,
        )

        quote = response.choices[0].message.content.strip()
        quote = quote.strip('"').strip("'").strip()

        # Hard trim in case model gets wordy
        words = quote.split()
        if len(words) > 22:
            quote = " ".join(words[:22])

        return quote


# Integration function for smolagents tool
def create_hybrid_generator(openai_client):
    """
    Create hybrid generator instance for use in app.
    Call this once at startup.
    """
    return HybridQuoteGenerator(
        gemini_key=os.getenv("GEMINI_API_KEY"),
        openai_client=openai_client,
    )


# Example usage and testing
if __name__ == "__main__":
    print("Testing Gemini Quote Generator with Variety Tracking (short quotes)\n")
    print("=" * 60)

    try:
        generator = QuoteGenerator()

        print("\nGenerating 5 short quotes about Motivation/Cinematic:\n")
        for i in range(5):
            quote = generator.generate_quote("Motivation", "Cinematic")
            print(f"{i+1}. {quote}\n")

        stats = generator.get_stats()
        print("\nStats:")
        print(f"  Total generated: {stats['total_quotes_generated']}")
        print(f"  Niches tracked: {stats['niches_tracked']}")
        print(f"  Per niche: {stats['quotes_by_niche']}")
    except Exception as e:
        print(f"Error: {e}")
        print("\nMake sure GEMINI_API_KEY is set in environment variables")