import os import logging import asyncio from huggingface_hub import hf_hub_download from llama_cpp import Llama logger = logging.getLogger("nexari.coder") BASE_DIR = "./models/coder" model = None REPO_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF" FILENAME = "qwen2.5-coder-1.5b-instruct-q8_0.gguf" def download_if_needed(local_dir: str): os.makedirs(local_dir, exist_ok=True) local_path = os.path.join(local_dir, FILENAME) if os.path.exists(local_path): logger.info(f"Coder model already present: {local_path}") return local_path logger.info(f"Downloading coder model to {local_dir} ...") path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir) logger.info(f"Downloaded coder: {path}") return path def load_model(local_dir: str = None): global model if not local_dir: local_dir = BASE_DIR try: model_path = download_if_needed(local_dir) model = Llama(model_path=model_path, n_ctx=2048, verbose=False) logger.info("Coder model loaded") return model except Exception as e: logger.exception(f"Failed to load coder model: {e}") model = None raise async def load_model_async(): return await asyncio.to_thread(load_model)