Spaces:
Running
Running
| import os | |
| import logging | |
| import asyncio | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| logger = logging.getLogger("nexari.chat") | |
| BASE_DIR = "./models/chat" | |
| model = None | |
| REPO_ID = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" | |
| FILENAME = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" | |
| def download_if_needed(local_dir: str): | |
| os.makedirs(local_dir, exist_ok=True) | |
| local_path = os.path.join(local_dir, FILENAME) | |
| if os.path.exists(local_path): | |
| logger.info(f"Chat model already present: {local_path}") | |
| return local_path | |
| logger.info(f"Downloading chat model to {local_dir} ...") | |
| path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir) | |
| logger.info(f"Downloaded chat: {path}") | |
| return path | |
| def load_model(local_dir: str = None): | |
| global model | |
| if not local_dir: | |
| local_dir = BASE_DIR | |
| try: | |
| model_path = download_if_needed(local_dir) | |
| model = Llama(model_path=model_path, n_ctx=2048, verbose=False) | |
| logger.info("Chat model loaded") | |
| return model | |
| except Exception as e: | |
| logger.exception(f"Failed to load chat model: {e}") | |
| model = None | |
| raise | |
| async def load_model_async(): | |
| return await asyncio.to_thread(load_model) | |