SummerBridge-2024

Sleeping

cdancy commited on Jul 19, 2024

Commit

a5cb7d9

1 Parent(s): e0be22c

Updates of defaults and GPU resource change

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,8 +8,10 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Test Chat Information System for MEPO Summer Bridge 2024 courtesy of Dr. Dancy & THiCC Lab
@@ -25,6 +27,8 @@ As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-
 this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "We won't be able to run this space! We need GPU processing"
@@ -36,12 +40,12 @@ if torch.cuda.is_available():
     tokenizer.use_default_system_prompt = False
-@spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
-    max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 512
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+DEFAULT_SYS_PROMPT = """\
+"""
 DESCRIPTION = """\
 # Test Chat Information System for MEPO Summer Bridge 2024 courtesy of Dr. Dancy & THiCC Lab
 this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 """
+default
 if not torch.cuda.is_available():
     DESCRIPTION += "We won't be able to run this space! We need GPU processing"
     tokenizer.use_default_system_prompt = False
+@spaces.GPU(duration=120)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
+    max_new_tokens: int = MAX_MAX_NEW_TOKENS,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,