Mattral commited on
Commit
ff66b46
·
verified ·
1 Parent(s): 5b2acf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -26,16 +26,17 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
26
 
27
  print("loading the LLM......................................")
28
 
29
- # llm = LlamaCpp(
30
- # model_path="./llama-2-7b-chat.Q3_K_S.gguf",
31
- # temperature = 0.2,
32
- # n_ctx=2048,
33
- # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
34
- # max_tokens = 500,
35
- # callback_manager=callback_manager,
36
- # verbose=True,
37
- # )
38
 
 
39
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
  model_file="llama-2-7b-chat.Q3_K_S.gguf",
41
  model_type="llama",
@@ -44,7 +45,7 @@ llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
44
  max_new_tokens = 300,
45
  )
46
 
47
-
48
 
49
  print("LLM loaded........................................")
50
  print("################################################################")
 
26
 
27
  print("loading the LLM......................................")
28
 
29
+ llm = LlamaCpp(
30
+ model_path="./llama-2-7b-chat.Q3_K_S.gguf",
31
+ temperature = 0.2,
32
+ n_ctx=2048,
33
+ f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
34
+ max_tokens = 500,
35
+ callback_manager=callback_manager,
36
+ verbose=True,
37
+ )
38
 
39
+ '''
40
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
41
  model_file="llama-2-7b-chat.Q3_K_S.gguf",
42
  model_type="llama",
 
45
  max_new_tokens = 300,
46
  )
47
 
48
+ '''
49
 
50
  print("LLM loaded........................................")
51
  print("################################################################")