File size: 2,151 Bytes
109a0c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94c4923
109a0c8
 
 
 
 
94c4923
2792ede
94c4923
109a0c8
 
94c4923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeaf225
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
from typing import List, Optional, Union
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from api_types import ChatMessage


def parse_think_response(full_response: str):
    think_start = full_response.find("<think")
    if think_start == -1:
        return None, full_response.strip()

    think_end = full_response.find("</think>")
    if think_end == -1:  # 未闭合的情况
        reasoning = full_response[think_start:].strip()
        content = ""
    else:
        reasoning = full_response[think_start : think_end + 9].strip()  # +9包含完整标签
        content = full_response[think_end + 9 :].strip()

    # 清理标签保留内容
    reasoning_content = reasoning.replace("<think", "").replace("</think>", "").strip()
    return reasoning_content, content


def cleanMessages(messages: List[ChatMessage], removeThinkingContent: bool = False):
    promptStrList = []

    for message in messages:
        content = message.content.strip()
        content = re.sub(r"\n+", "\n", content)
        promptStrList.append(
            f"{message.role.strip().lower().capitalize()}: {content if message.role!='Assistant' or not removeThinkingContent else remove_nested_think_tags_stack(content)}"
        )

    return "\n\n".join(promptStrList)


def remove_nested_think_tags_stack(text):
    stack = []
    result = ""
    i = 0
    while i < len(text):
        if text[i : i + 7] == "<think>":
            stack.append("<think>")
            i += 7
        elif text[i : i + 8] == "</think>":
            if stack and stack[-1] == "<think>":
                stack.pop()
                i += 8
            else:
                result += text[i : i + 8]
                i += 8
        elif not stack:
            result += text[i]
            i += 1
        else:
            i += 1
    return result


def format_bytes(size):
    power = 2**10
    n = 0
    power_labels = {0: "", 1: "K", 2: "M", 3: "G", 4: "T"}
    while size > power:
        size /= power
        n += 1
    return f"{size:.4f}{power_labels[n]+'B'}"