Spaces:

futranbg
/

chat-hf

Sleeping

futranbg commited on Oct 27, 2023

Commit

6ca346a

1 Parent(s): c4379ed

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,29 +1,21 @@
 import os
 import gradio as gr
-#import time
-from langchain.llms import CTransformers
-from langchain.cache import InMemoryCache
-import langchain
 model_repo = os.getenv('HF_MODEL_REPO')
 model_bin = os.getenv('HF_MODEL_BIN')
-llm_config = {
-          'max_new_tokens': 400,
-          'temperature': 0.8,
-          'top_p': 0.5,
-          'repetition_penalty': 1.1,
-          }
-llm = CTransformers(
         model=model_repo,
         model_file=model_bin,
-        config=llm_config,
-        lib="avx2"
         )
-langchain.llm_cache = InMemoryCache()
 def response(prompt):
-    txt = llm(prompt, reset=True, threads=2)
     return txt
 if __name__ == '__main__':

 import os
 import gradio as gr
+from ctransformers import AutoModelForCausalLM
 model_repo = os.getenv('HF_MODEL_REPO')
 model_bin = os.getenv('HF_MODEL_BIN')
+llm = AutoModelForCausalLM.from_pretrained(
         model=model_repo,
         model_file=model_bin,
+        threads=2,
+        seed=42,
+        context_length=16384,
+        lib="avx2",
         )
 def response(prompt):
+    txt = llm(prompt, max_new_tokens=8192, temperature=0.8, top_p=0.5, repetition_penalty=1.1, reset=False, stop=["</s>","<|im_end|>"], )
     return txt
 if __name__ == '__main__':