Nexari-G1.1 / chat_model.py
Nexari-Research's picture
Update chat_model.py
2f7603d verified
import os
import logging
import asyncio
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
logger = logging.getLogger("nexari.chat")
BASE_DIR = "./models/chat"
model = None
REPO_ID = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
FILENAME = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
def download_if_needed(local_dir: str):
os.makedirs(local_dir, exist_ok=True)
local_path = os.path.join(local_dir, FILENAME)
if os.path.exists(local_path):
logger.info(f"Chat model already present: {local_path}")
return local_path
logger.info(f"Downloading chat model to {local_dir} ...")
path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
logger.info(f"Downloaded chat: {path}")
return path
def load_model(local_dir: str = None):
global model
if not local_dir:
local_dir = BASE_DIR
try:
model_path = download_if_needed(local_dir)
model = Llama(model_path=model_path, n_ctx=2048, verbose=False)
logger.info("Chat model loaded")
return model
except Exception as e:
logger.exception(f"Failed to load chat model: {e}")
model = None
raise
async def load_model_async():
return await asyncio.to_thread(load_model)