import os
import json
from datetime import datetime
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient

# PDF extraction libraries
try:
    from pypdf import PdfReader
    PYPDF_AVAILABLE = True
except ImportError:
    PYPDF_AVAILABLE = False

try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

try:
    import pandas as pd
    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False

# Model configurations
MODELS = [
    "Qwen/Qwen2.5-72B-Instruct",
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "google/gemma-2-9b-it",
    "microsoft/Phi-3-mini-4k-instruct",
]

SYSTEM_PROMPTS = {
    "Default": "You are a helpful, respectful and honest assistant.",
    "Document Analyzer": "You are an expert at analyzing documents. Provide detailed insights, summaries, and answer questions based on the provided document content.",
    "Code Expert": "You are an expert programmer. Analyze code, provide explanations, and suggest improvements.",
    "Data Scientist": "You are a data science expert. Analyze data files and provide insights with statistical analysis.",
    "Research Assistant": "You are a research assistant. Help analyze academic papers and documents, extract key findings.",
}

def extract_text_from_pdf(file_path):
    """Extract text from PDF"""
    if not PYPDF_AVAILABLE:
        return "❌ PDF extraction unavailable."
    
    try:
        reader = PdfReader(file_path)
        text = f"📄 PDF: {len(reader.pages)} pages\n\n"
        
        for page_num, page in enumerate(reader.pages, 1):
            page_text = page.extract_text()
            text += f"--- Page {page_num} ---\n{page_text}\n\n"
        return text
    except Exception as e:
        return f"❌ Error reading PDF: {str(e)}"

def extract_text_from_docx(file_path):
    """Extract text from DOCX"""
    if not DOCX_AVAILABLE:
        return "❌ DOCX extraction unavailable."
    
    try:
        doc = docx.Document(file_path)
        return "\n\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    except Exception as e:
        return f"❌ Error reading DOCX: {str(e)}"

def extract_text_from_txt(file_path):
    """Extract text from TXT"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except UnicodeDecodeError:
        with open(file_path, 'r', encoding='latin-1') as f:
            return f.read()
    except Exception as e:
        return f"❌ Error: {str(e)}"

def extract_text_from_csv(file_path):
    """Extract text from CSV"""
    if not PANDAS_AVAILABLE:
        return "❌ CSV extraction unavailable."
    
    try:
        df = pd.read_csv(file_path)
        text = f"📊 CSV: {len(df)} rows, {len(df.columns)} columns\n\n"
        text += f"Columns: {', '.join(df.columns)}\n\n"
        text += f"Preview (first 10 rows):\n{df.head(10).to_string()}\n\n"
        text += f"Statistics:\n{df.describe().to_string()}"
        return text
    except Exception as e:
        return f"❌ Error: {str(e)}"

def process_files(files):
    """Process uploaded files"""
    if not files:
        return ""
    
    content = "\n\n" + "="*50 + "\n📎 UPLOADED DOCUMENTS\n" + "="*50 + "\n\n"
    
    for file_path in files:
        file_name = Path(file_path).name
        file_ext = Path(file_path).suffix.lower()
        
        content += f"\n📄 **{file_name}**\n\n"
        
        if file_ext == '.pdf':
            text = extract_text_from_pdf(file_path)
        elif file_ext in ['.docx', '.doc']:
            text = extract_text_from_docx(file_path)
        elif file_ext in ['.txt', '.md', '.py', '.json']:
            text = extract_text_from_txt(file_path)
        elif file_ext == '.csv':
            text = extract_text_from_csv(file_path)
        else:
            text = f"⚠️ Unsupported format: {file_ext}"
        
        content += text + "\n\n" + "-"*50 + "\n"
    
    return content

def convert_history_to_messages(history):
    """Convert tuple history to OpenAI message format"""
    messages = []
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    return messages

def respond(message, history, system_message, max_tokens, temperature, top_p, model_id):
    """Main chat function - TUPLE FORMAT"""
    token = os.getenv("HF_TOKEN")
    
    if not token:
        history.append([message, "⚠️ HF_TOKEN not configured. Please set it in Space settings → Repository secrets."])
        return history, None
    
    try:
        client = InferenceClient(token=token, model=model_id)
        
        # Build messages from history
        messages = [{"role": "system", "content": system_message}]
        messages.extend(convert_history_to_messages(history))
        
        # Handle message input
        user_text = ""
        files = []
        
        if isinstance(message, dict):
            user_text = message.get("text", "")
            files = message.get("files", [])
        else:
            user_text = str(message) if message else ""
        
        if not user_text.strip() and not files:
            return history, None
        
        # Process files
        file_content = process_files(files) if files else ""
        full_message = user_text + file_content
        
        messages.append({"role": "user", "content": full_message})
        
        # Stream response
        response = ""
        history.append([user_text, ""])  # Add user message with empty response
        
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices and chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                history[-1] = [user_text, response]  # Update the last tuple
                yield history, None
                
    except Exception as e:
        error_msg = f"❌ Error: {str(e)}\n\nTry a different model or check token permissions."
        history[-1] = [user_text, error_msg]
        yield history, None

def save_conversation(history):
    """Save conversation"""
    if not history:
        return "⚠️ No conversation to save"
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"chat_{timestamp}.json"
    
    try:
        with open(filename, 'w') as f:
            json.dump(history, f, indent=2)
        return f"✅ Saved to {filename}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

def update_system_prompt(preset):
    return SYSTEM_PROMPTS.get(preset, SYSTEM_PROMPTS["Default"])

def clear_chat():
    return [], None, ""

# Custom theme
custom_theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="slate",
)

# Build interface
with gr.Blocks(title="AI Document Assistant") as demo:
    
    gr.Markdown(
        """
        # 🤖 AI Document Assistant
        Upload documents (PDF, DOCX, TXT, CSV) and chat with AI
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            # FIXED: Removed 'type' parameter - uses default tuple format
            chatbot = gr.Chatbot(
                height=550,
                show_label=False,
                avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"),
            )
            
            msg_input = gr.MultimodalTextbox(
                file_count="multiple",
                file_types=[".pdf", ".docx", ".txt", ".csv", ".md", ".py", ".json"],
                placeholder="💬 Ask a question or upload documents (PDF, DOCX, TXT, CSV)...",
                show_label=False,
            )
            
            with gr.Row():
                clear_btn = gr.Button("🗑️ Clear Chat", variant="stop")
        
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Settings")
            
            model_dropdown = gr.Dropdown(
                choices=MODELS,
                value=MODELS[0],
                label="🤖 Model",
            )
            
            preset_dropdown = gr.Dropdown(
                choices=list(SYSTEM_PROMPTS.keys()),
                value="Document Analyzer",
                label="📋 Preset",
            )
            
            system_prompt = gr.Textbox(
                value=SYSTEM_PROMPTS["Document Analyzer"],
                label="💬 System Prompt",
                lines=3,
            )
            
            gr.Markdown("### 🎛️ Parameters")
            
            max_tokens = gr.Slider(128, 4096, 2048, step=128, label="Max Tokens")
            temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(0.1, 1.0, 0.95, step=0.05, label="Top-p")
            
            gr.Markdown("### 💾 Save")
            save_btn = gr.Button("💾 Save Chat")
            status = gr.Textbox(show_label=False, interactive=False, placeholder="Status...")
    
    gr.Markdown(
        """
        **💡 How to use:**
        1. Upload PDF, DOCX, TXT, or CSV files
        2. Ask questions about the document content
        3. Adjust temperature for creativity (lower = focused, higher = creative)
        
        **📦 Supported libraries:** pypdf, python-docx, pandas
        """
    )
    
    # Events
    preset_dropdown.change(update_system_prompt, [preset_dropdown], [system_prompt])
    
    msg_input.submit(
        respond,
        [msg_input, chatbot, system_prompt, max_tokens, temperature, top_p, model_dropdown],
        [chatbot, msg_input],
    )
    
    clear_btn.click(clear_chat, None, [chatbot, msg_input, status])
    save_btn.click(save_conversation, [chatbot], [status])

if __name__ == "__main__":
    demo.queue()
    demo.launch()