File size: 7,373 Bytes
968db8f
 
 
 
 
 
 
 
 
 
e8c1be7
4754d23
968db8f
 
 
 
86e761a
 
 
 
e8c1be7
 
968db8f
 
4754d23
 
 
 
 
 
91513d8
 
 
fc16e41
 
91513d8
4754d23
 
 
968db8f
 
 
 
 
 
 
 
 
 
 
 
 
 
e8c1be7
4754d23
968db8f
 
4754d23
968db8f
fc16e41
 
 
475a393
 
fc16e41
 
475a393
 
 
fc16e41
 
968db8f
fc16e41
 
968db8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a81181
968db8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475a393
 
968db8f
 
 
 
 
e8c1be7
968db8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308f945
968db8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308f945
968db8f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
import json
import requests

class Chatbot:
    def __init__(self, config):
        self.video_id = config.get('video_id')
        self.content_subject = config.get('content_subject')
        self.content_grade = config.get('content_grade')
        self.jutor_chat_key = config.get('jutor_chat_key')
        self.transcript_text = self.get_transcript_text(config.get('transcript'))
        self.key_moments_text = self.get_key_moments_text(config.get('key_moments'))
        self.ai_name = config.get('ai_name')
        self.ai_client = config.get('ai_client')

    def get_transcript_text(self, transcript_data):
        if isinstance(transcript_data, str):
            transcript_json = json.loads(transcript_data)
        else:
            transcript_json = transcript_data
        for entry in transcript_json:
            entry.pop('end_time', None)
        transcript_text = json.dumps(transcript_json, ensure_ascii=False)
        return transcript_text
    
    def get_key_moments_text(self, key_moments_data):
        if isinstance(key_moments_data, str):
            key_moments_json = json.loads(key_moments_data)
        else:
            key_moments_json = key_moments_data
        # key_moments_json remove images
        for moment in key_moments_json:
            moment.pop('images', None)
            moment.pop('end', None)
            moment.pop('transcript', None)
            
        key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
        return key_moments_text


    def chat(self, user_message, chat_history, socratic_mode=False, service_type='jutor'):
        messages = self.prepare_messages(chat_history, user_message)
        system_prompt = self.prepare_system_prompt(socratic_mode)
        if service_type in ['jutor', 'groq', 'claude3']:
            response_text = self.chat_with_service(service_type, system_prompt, messages)
            return response_text
        else:
            raise gr.Error("不支持此服務")

    def prepare_system_prompt(self, socratic_mode):
        content_subject = self.content_subject
        content_grade = self.content_grade
        video_id = self.video_id
        transcript_text = self.transcript_text
        key_moments_text = self.key_moments_text
        socratic_mode = str(socratic_mode)
        ai_name = self.ai_name

        system_prompt = f"""
            subject: {content_subject}
            grade: {content_grade}
            context: {key_moments_text}
            Assistant Role: you are a {content_subject} teacher
            User Role: {content_grade} th-grade student.
            Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
            Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
            Response: Single question, under 100 characters, include math symbols (use LaTeX $), hint with video timestamp which format 【參考:00:00:00】. 
            Sometimes encourage user by Taiwanese style.
            if user ask questions not include in context, 
            just tell them to ask the question in context and give them example question.
            Restrictions: Answer within video content, no external references
        """
        print("====system_prompt====")
        print(system_prompt)

        return system_prompt

    def prepare_messages(self, chat_history, user_message):
        messages = []
        if chat_history is not None:
            if len(chat_history) > 10:
                chat_history = chat_history[-10:]

            for user_msg, assistant_msg in chat_history:
                if user_msg:
                    messages.append({"role": "user", "content": user_msg})
                if assistant_msg:
                    messages.append({"role": "assistant", "content": assistant_msg})
                
        if user_message:
            user_message += "/n (請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,不用提到「逐字稿」這個詞,用「內容」代替),回答時請用數學符號代替文字(Latex 用 $ 字號 render)"
            messages.append({"role": "user", "content": user_message})
        return messages

    def chat_with_service(self, service_type, system_prompt, messages):
        if service_type == 'jutor':
            return self.chat_with_jutor(system_prompt, messages)
        elif service_type == 'groq':
            return self.chat_with_groq(system_prompt, messages)
        elif service_type == 'claude3':
            return self.chat_with_claude3(system_prompt, messages)
        else:
            raise gr.Error("不支持的服务类型")

    def chat_with_jutor(self, system_prompt, messages):
        messages.insert(0, {"role": "system", "content": system_prompt})
        api_endpoint = "https://ci-live-feat-video-ai-dot-junyiacademy.appspot.com/api/v2/jutor/hf-chat"
        headers = {
            "Content-Type": "application/json",
            "x-api-key": self.jutor_chat_key,
        }
        model = "gpt-4-1106-preview"
        # model = "gpt-3.5-turbo-0125"
        data = {
            "data": {
                "messages": messages,
                "max_tokens": 512,
                "temperature": 0.9,
                "model": model,
                "stream": False,
            }
        }

        response = requests.post(api_endpoint, headers=headers, data=json.dumps(data))
        response_data = response.json()
        response_completion = response_data['data']['choices'][0]['message']['content'].strip()
        return response_completion

    def chat_with_groq(self, system_prompt, messages):
        # system_prompt insert to messages 的最前面 {"role": "system", "content": system_prompt}
        messages.insert(0, {"role": "system", "content": system_prompt})
        request_payload = {
            "model": "mixtral-8x7b-32768",
            "messages": messages,
            "max_tokens": 500  # 設定一個較大的值,可根據需要調整
        }
        groq_client = self.ai_client
        response = groq_client.chat.completions.create(**request_payload)
        response_completion = response.choices[0].message.content.strip()
        return response_completion

    def chat_with_claude3(self, system_prompt, messages):
        if not system_prompt.strip():
            raise ValueError("System prompt cannot be empty")
        
        model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
        # model_id = "anthropic.claude-3-haiku-20240307-v1:0"
        kwargs = {
            "modelId": model_id,
            "contentType": "application/json",
            "accept": "application/json",
            "body": json.dumps({
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 500,
                "system": system_prompt,
                "messages": messages
            })
        }
        print(messages)
        # 建立 message API,讀取回應
        bedrock_client = self.ai_client
        response = bedrock_client.invoke_model(**kwargs)
        response_body = json.loads(response.get('body').read())
        response_completion = response_body.get('content')[0].get('text').strip()
        return response_completion