test
Browse files
app.py
CHANGED
@@ -2,20 +2,28 @@ import os
|
|
2 |
import gradio as gr
|
3 |
#import time
|
4 |
from langchain.llms import CTransformers
|
|
|
|
|
5 |
|
6 |
model_repo = os.getenv('HF_MODEL_REPO')
|
7 |
model_bin = os.getenv('HF_MODEL_BIN')
|
8 |
llm_config = {
|
9 |
-
'max_new_tokens':
|
10 |
'temperature': 0.8,
|
11 |
'top_p': 0.5,
|
12 |
'repetition_penalty': 1.1,
|
13 |
}
|
14 |
|
15 |
-
llm = CTransformers(
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def response(prompt):
|
18 |
-
txt = llm(prompt)
|
19 |
return txt
|
20 |
|
21 |
if __name__ == '__main__':
|
|
|
2 |
import gradio as gr
|
3 |
#import time
|
4 |
from langchain.llms import CTransformers
|
5 |
+
from langchain.cache import InMemoryCache
|
6 |
+
import langchain
|
7 |
|
8 |
model_repo = os.getenv('HF_MODEL_REPO')
|
9 |
model_bin = os.getenv('HF_MODEL_BIN')
|
10 |
llm_config = {
|
11 |
+
'max_new_tokens': 2048,
|
12 |
'temperature': 0.8,
|
13 |
'top_p': 0.5,
|
14 |
'repetition_penalty': 1.1,
|
15 |
}
|
16 |
|
17 |
+
llm = CTransformers(
|
18 |
+
model=model_repo,
|
19 |
+
model_file=model_bin,
|
20 |
+
config=llm_config,
|
21 |
+
lib="avx2"
|
22 |
+
)
|
23 |
+
langchain.llm_cache = InMemoryCache()
|
24 |
|
25 |
def response(prompt):
|
26 |
+
txt = llm(prompt, reset=True, threads=2)
|
27 |
return txt
|
28 |
|
29 |
if __name__ == '__main__':
|