import os
import logging
import asyncio
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

logger = logging.getLogger("nexari.coder")
BASE_DIR = "./models/coder"
model = None

REPO_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF"
FILENAME = "qwen2.5-coder-1.5b-instruct-q8_0.gguf"

def download_if_needed(local_dir: str):
    os.makedirs(local_dir, exist_ok=True)
    local_path = os.path.join(local_dir, FILENAME)
    if os.path.exists(local_path):
        logger.info(f"Coder model already present: {local_path}")
        return local_path
    logger.info(f"Downloading coder model to {local_dir} ...")
    path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
    logger.info(f"Downloaded coder: {path}")
    return path

def load_model(local_dir: str = None):
    global model
    if not local_dir:
        local_dir = BASE_DIR
    try:
        model_path = download_if_needed(local_dir)
        model = Llama(model_path=model_path, n_ctx=2048, verbose=False)
        logger.info("Coder model loaded")
        return model
    except Exception as e:
        logger.exception(f"Failed to load coder model: {e}")
        model = None
        raise

async def load_model_async():
    return await asyncio.to_thread(load_model)