AlanXian commited on
Commit
40103ce
·
1 Parent(s): 33032fd

update message format

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -51,6 +51,14 @@ h1 {
51
  # Load the tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/Apollo-7B")
53
 
 
 
 
 
 
 
 
 
54
  model = AutoModelForCausalLM.from_pretrained("FreedomIntelligence/Apollo-7B", device_map="auto") # to("cuda:0")
55
  terminators = [
56
  tokenizer.eos_token_id,
@@ -58,39 +66,30 @@ terminators = [
58
  ]
59
 
60
  @spaces.GPU(duration=120)
61
- def chat_llama3_8b(conversation_data: dict,
 
62
  temperature: float,
63
  max_new_tokens: int
64
  ) -> str:
65
  """
66
  Generate a streaming response using the llama3-8b model.
67
  Args:
68
- conversation_data (dict): A dictionary containing 'text' and 'history'.
 
69
  temperature (float): The temperature for generating the response.
70
  max_new_tokens (int): The maximum number of new tokens to generate.
71
  Returns:
72
  str: The generated response.
73
  """
74
- message = conversation_data.get("text", "")
75
- history_str = conversation_data.get("history", "")
76
-
77
- # 处理历史记录
78
- conversation = []
79
- if history_str:
80
- # 假设历史记录是以某种格式存储的字符串,需要根据实际格式进行解析
81
- # 这里假设历史记录是以换行符分隔的用户和助手消息,偶数行是用户,奇数行是助手
82
- lines = history_str.strip().split('\n')
83
- for i in range(0, len(lines), 2):
84
- if i+1 < len(lines):
85
- user_msg = lines[i]
86
- assistant_msg = lines[i+1]
87
- conversation.extend([
88
- {"role": "user", "content": user_msg},
89
- {"role": "assistant", "content": assistant_msg}
90
- ])
91
 
92
- # 添加最新的用户消息
93
- conversation.append({"role": "user", "content": message})
 
 
94
 
95
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
96
 
 
51
  # Load the tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/Apollo-7B")
53
 
54
+ chat = [
55
+ {"role": "user", "content": "Hello, how are you?"},
56
+ {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
57
+ {"role": "user", "content": "I'd like to show off how chat templating works!"},
58
+ ]
59
+
60
+ tokenizer.apply_chat_template(chat, tokenize=False)
61
+
62
  model = AutoModelForCausalLM.from_pretrained("FreedomIntelligence/Apollo-7B", device_map="auto") # to("cuda:0")
63
  terminators = [
64
  tokenizer.eos_token_id,
 
66
  ]
67
 
68
  @spaces.GPU(duration=120)
69
+ def chat_llama3_8b(message: str,
70
+ history: list,
71
  temperature: float,
72
  max_new_tokens: int
73
  ) -> str:
74
  """
75
  Generate a streaming response using the llama3-8b model.
76
  Args:
77
+ message (str): The input message.
78
+ history (list): The conversation history used by ChatInterface.
79
  temperature (float): The temperature for generating the response.
80
  max_new_tokens (int): The maximum number of new tokens to generate.
81
  Returns:
82
  str: The generated response.
83
  """
84
+ # Build conversation as pure array format
85
+ history_messages = []
86
+ for user, assistant in history:
87
+ history_messages.extend(assistant)
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ conversation = [
90
+ "text": message, # 当前消息
91
+ "history": history_messages # 历史消息数组
92
+ ]
93
 
94
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
95