File size: 3,680 Bytes
25fca04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import json
from huggingface_hub import InferenceClient
import re

from config import MODEL_NAME
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()
api_key = os.getenv("HUGGINGFACE_TOKEN")

# Prompt配置
SYSTEM_PROMPT = "你是一位精通英语和中文的教学专家,专门帮助用户,单词量为8000的语言学习者,将中文翻译成地道的英语表达,并提取重要词汇,比如,形容词,名词和动词,提供解释和用法说明。只用输出json格式的数据,确保json完整性和正确性,不要在开头加任何内容,并且注意json格式,control character。"

PROMPT_TEMPLATE = """请将以下中文翻译成英语,并提取重要词汇,比如,形容词,名词和动词,针对每个单词,提供解释和简单的造句说明:
    中文文本:
    <text>
    {text}
    <text>
    
    请按照以下JSON格式返回:
    {{
        "english": "英语翻译",
        "important_words": [
            {{
                "word_en": "重要英语单词或短语",
                "meaning_ch": "中文含义",
                "usage": "用法说明"
            }}
        ]
    }}
"""

USER_SHOT_1 = "我昨天去公园散步,看到很多人在那里锻炼身体"
SHOT_1_RESPONSE = """
{
  "english": "I went for a walk in the park yesterday and saw many people exercising there.",
  "important_words": [
    {
      "word_en": "exercise",
      "meaning_ch": "锻炼身体",
      "usage": "e.g. I go to the gym to exercise every day."
    },
    {
      "word_en": "park",
      "meaning_ch": "公园",
      "usage": "e.g. I took a walk in the park on Sunday."
    },
    {
      "word_en": "scatter",
      "meaning_ch": "散步",
      "usage": "e.g. I like to scatter around the city to explore new places."
    }
  ]
}
"""

USER_SHOT_2 = "我昨天吃坏肚子了,一直在拉肚子。"
SHOT_2_RESPONSE = """{
  "english": "I had a stomachache yesterday and have been experiencing diarrhea ever since.",
  "important_words": [
    {
      "word_en": "stomachache",
      "meaning_ch": "肚子痛",
      "usage": "Example sentence: I woke up with a stomachache and couldn't eat anything."
    },
    {
      "word_en": "diarrhea",
      "meaning_ch": "腹泻",
      "usage": "Example sentence: She had diarrhea after eating spoiled food."
    }
  ]
}
"""


def message_builder(text):
    """构建消息,包含few-shot示例"""
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": PROMPT_TEMPLATE.format(text=USER_SHOT_1)
        },
        {
            "role": "assistant",
            "content": SHOT_1_RESPONSE
        },
        {
            "role": "user",
            "content": PROMPT_TEMPLATE.format(text=USER_SHOT_2)
        },
        {
            "role": "assistant",
            "content": SHOT_2_RESPONSE
        },
        {
            "role": "user",
            "content": PROMPT_TEMPLATE.format(text=text)
        }
    ]
    return messages


client = InferenceClient(api_key=api_key)


def get_llm_response(text) -> str:
    """调用LLM获取响应"""

    print("get_llm_response")

    messages = message_builder(text)

    llm_response = client.chat.completions.create(
        model=MODEL_NAME, messages=messages, max_tokens=1024, temperature=0.3)

    return llm_response.choices[0].message.content


# main
if __name__ == "__main__":
    response = get_llm_response("我昨天去公园散步,看到很多人在那里锻炼身体")
    print(response)

    # import pprint
    # pprint.pprint(response)