cdancy commited on
Commit
a5cb7d9
·
1 Parent(s): e0be22c

Updates of defaults and GPU resource change

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -8,8 +8,10 @@ import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
  MAX_MAX_NEW_TOKENS = 2048
11
- DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
 
13
 
14
  DESCRIPTION = """\
15
  # Test Chat Information System for MEPO Summer Bridge 2024 courtesy of Dr. Dancy & THiCC Lab
@@ -25,6 +27,8 @@ As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-
25
  this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
26
  """
27
 
 
 
28
  if not torch.cuda.is_available():
29
  DESCRIPTION += "We won't be able to run this space! We need GPU processing"
30
 
@@ -36,12 +40,12 @@ if torch.cuda.is_available():
36
  tokenizer.use_default_system_prompt = False
37
 
38
 
39
- @spaces.GPU
40
  def generate(
41
  message: str,
42
  chat_history: list[tuple[str, str]],
43
  system_prompt: str,
44
- max_new_tokens: int = 1024,
45
  temperature: float = 0.6,
46
  top_p: float = 0.9,
47
  top_k: int = 50,
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
  MAX_MAX_NEW_TOKENS = 2048
11
+ DEFAULT_MAX_NEW_TOKENS = 512
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
+ DEFAULT_SYS_PROMPT = """\
14
+ """
15
 
16
  DESCRIPTION = """\
17
  # Test Chat Information System for MEPO Summer Bridge 2024 courtesy of Dr. Dancy & THiCC Lab
 
27
  this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
28
  """
29
 
30
+ default
31
+
32
  if not torch.cuda.is_available():
33
  DESCRIPTION += "We won't be able to run this space! We need GPU processing"
34
 
 
40
  tokenizer.use_default_system_prompt = False
41
 
42
 
43
+ @spaces.GPU(duration=120)
44
  def generate(
45
  message: str,
46
  chat_history: list[tuple[str, str]],
47
  system_prompt: str,
48
+ max_new_tokens: int = MAX_MAX_NEW_TOKENS,
49
  temperature: float = 0.6,
50
  top_p: float = 0.9,
51
  top_k: int = 50,