Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,21 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
|
4 |
-
from langchain.llms import CTransformers
|
5 |
-
from langchain.cache import InMemoryCache
|
6 |
-
import langchain
|
7 |
|
8 |
model_repo = os.getenv('HF_MODEL_REPO')
|
9 |
model_bin = os.getenv('HF_MODEL_BIN')
|
10 |
-
llm_config = {
|
11 |
-
'max_new_tokens': 400,
|
12 |
-
'temperature': 0.8,
|
13 |
-
'top_p': 0.5,
|
14 |
-
'repetition_penalty': 1.1,
|
15 |
-
}
|
16 |
|
17 |
-
llm =
|
18 |
model=model_repo,
|
19 |
model_file=model_bin,
|
20 |
-
|
21 |
-
|
|
|
|
|
22 |
)
|
23 |
-
langchain.llm_cache = InMemoryCache()
|
24 |
|
25 |
def response(prompt):
|
26 |
-
txt = llm(prompt, reset=
|
27 |
return txt
|
28 |
|
29 |
if __name__ == '__main__':
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from ctransformers import AutoModelForCausalLM
|
|
|
|
|
|
|
4 |
|
5 |
model_repo = os.getenv('HF_MODEL_REPO')
|
6 |
model_bin = os.getenv('HF_MODEL_BIN')
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
llm = AutoModelForCausalLM.from_pretrained(
|
9 |
model=model_repo,
|
10 |
model_file=model_bin,
|
11 |
+
threads=2,
|
12 |
+
seed=42,
|
13 |
+
context_length=16384,
|
14 |
+
lib="avx2",
|
15 |
)
|
|
|
16 |
|
17 |
def response(prompt):
|
18 |
+
txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<|im_end|>"], )
|
19 |
return txt
|
20 |
|
21 |
if __name__ == '__main__':
|