File size: 739 Bytes
eb6fd1c
 
6ca346a
eb6fd1c
cff4853
 
eb6fd1c
6ca346a
e255213
151d541
6ca346a
 
 
 
151d541
eb6fd1c
 
6ca346a
06342ef
eb6fd1c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM

model_repo = os.getenv('HF_MODEL_REPO')
model_bin = os.getenv('HF_MODEL_BIN')

llm = AutoModelForCausalLM.from_pretrained(
        model_repo,
        model_file=model_bin,
        threads=2,
        seed=42,
        context_length=16384,        
        lib="avx2",
        )

def response(prompt):
    txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<|im_end|>"], )
    return txt

if __name__ == '__main__':

    title = "Chat"

    demo_status = "Demo is running on CPU"

    gr.Interface(response, inputs="text", outputs="text",
                 title=title,
                 ).launch()