futranbg commited on
Commit
151d541
·
1 Parent(s): 88417e5
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -2,20 +2,28 @@ import os
2
  import gradio as gr
3
  #import time
4
  from langchain.llms import CTransformers
 
 
5
 
6
  model_repo = os.getenv('HF_MODEL_REPO')
7
  model_bin = os.getenv('HF_MODEL_BIN')
8
  llm_config = {
9
- 'max_new_tokens': 256,
10
  'temperature': 0.8,
11
  'top_p': 0.5,
12
  'repetition_penalty': 1.1,
13
  }
14
 
15
- llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config, lib="avx2")
 
 
 
 
 
 
16
 
17
  def response(prompt):
18
- txt = llm(prompt)
19
  return txt
20
 
21
  if __name__ == '__main__':
 
2
  import gradio as gr
3
  #import time
4
  from langchain.llms import CTransformers
5
+ from langchain.cache import InMemoryCache
6
+ import langchain
7
 
8
  model_repo = os.getenv('HF_MODEL_REPO')
9
  model_bin = os.getenv('HF_MODEL_BIN')
10
  llm_config = {
11
+ 'max_new_tokens': 2048,
12
  'temperature': 0.8,
13
  'top_p': 0.5,
14
  'repetition_penalty': 1.1,
15
  }
16
 
17
+ llm = CTransformers(
18
+ model=model_repo,
19
+ model_file=model_bin,
20
+ config=llm_config,
21
+ lib="avx2"
22
+ )
23
+ langchain.llm_cache = InMemoryCache()
24
 
25
  def response(prompt):
26
+ txt = llm(prompt, reset=True, threads=2)
27
  return txt
28
 
29
  if __name__ == '__main__':