File size: 739 Bytes
eb6fd1c 6ca346a eb6fd1c cff4853 eb6fd1c 6ca346a e255213 151d541 6ca346a 151d541 eb6fd1c 6ca346a 06342ef eb6fd1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM
model_repo = os.getenv('HF_MODEL_REPO')
model_bin = os.getenv('HF_MODEL_BIN')
llm = AutoModelForCausalLM.from_pretrained(
model_repo,
model_file=model_bin,
threads=2,
seed=42,
context_length=16384,
lib="avx2",
)
def response(prompt):
txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<|im_end|>"], )
return txt
if __name__ == '__main__':
title = "Chat"
demo_status = "Demo is running on CPU"
gr.Interface(response, inputs="text", outputs="text",
title=title,
).launch() |