|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
|
|
|
llm = Llama( |
|
|
model_path="https://huggingface.co/DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf/resolve/main/OpenAI-20B-NEO-Uncensored2-IQ4_NL.gguf", |
|
|
n_ctx=8192, |
|
|
n_gpu_layers=0, |
|
|
verbose=False |
|
|
) |
|
|
|
|
|
def generate_response(prompt): |
|
|
output = llm(prompt, max_tokens=512, temperature=1.0, top_p=0.95) |
|
|
return output['choices'][0]['text'] |
|
|
|
|
|
iface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="GPT-20B Test") |
|
|
iface.launch() |