File size: 12,697 Bytes
41f5990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06bc344
41f5990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗

import streamlit as st
import requests
from PIL import Image
import io
import base64
import time
import uuid
import json
from gtts import gTTS
import os
from litellm import completion
from dotenv import load_dotenv

load_dotenv()

def compress_image(image, max_size=(800, 800), quality=95):
    img_copy = image.copy()
    img_copy.thumbnail(max_size)
    buffered = io.BytesIO()
    img_copy.save(buffered, format="JPEG", quality=quality)
    return buffered.getvalue()

def analyze_image(image, api_key, model):
    compressed_image = compress_image(image)
    img_str = base64.b64encode(compressed_image).decode()
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Carefully observe this image and describe it in as much detail as possible. Please address the following aspects: primary subject matter, background setting, color palette, emotional conveyance, and specific details."},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{img_str}"
                    }
                }
            ]
        }
    ]
    response = completion(model=model, messages=messages, max_tokens=1024)
    return response.choices[0].message.content.strip()

def translate_to_chinese(text, api_key, model):
    if "groq/" in model:
        translation_model = "groq/gemma2-9b-it"
    else:
        translation_model = model
    messages = [
        {
            "role": "system",
            "content": "You are an expert translator proficient in both Traditional Chinese and English, with 40 years of translation experience and extensive cross-disciplinary knowledge. You have been deeply involved in the Chinese translations of The New York Times and Bloomberg, and have a deep understanding of the translation of current events and academic papers. I would like you to translate the following English text into Traditional Chinese, with a style similar to the Chinese versions of the aforementioned magazines. I would like to request a translation of the following English content into Traditional Chinese. Please ensure that the translation is accurate and natural-sounding."
        },
        {
            "role": "user",
            "content": f"THAT'S IMPORTANT OTHERWISE I'LL DIE. Translate the Text ``` {text} ``` into \"Traditional Chinese\". Must reply to me in Traditional Chinese."
        }
    ]
    response = completion(model=translation_model, messages=messages, max_tokens=1024)
    return response.choices[0].message.content.strip()

def resize_image(image, target_height=400):
    original_width, original_height = image.size
    aspect_ratio = original_width / original_height
    target_width = int(target_height * aspect_ratio)
    resized_image = image.resize((target_width, target_height), Image.LANCZOS)
    return resized_image

def main():
    st.set_page_config(
        layout="wide",
        page_title="AI-Powered Visual Storytelling",
        page_icon="🖼️",
        menu_items={
            'Get Help': None,
            'Report a bug': None,
            'About': '# 圖片AI辨識應用\n使用AI分析圖片內容之網頁程式。'
        }
    )
    st.markdown("""
    <style>
    .stApp {
        background-image: linear-gradient(to bottom, #e6f3ff, #ffffff);
    }
    .stTitle, .stMarkdown, .stRadio, .stFileUploader, .stTextInput > label, p {
        color: black !important;
    }
    .stTitle h1 {
        color: black !important;
    }
    .stButton>button {
        background-color: #3498db;
        color: white;
    }
    .stTextInput>div>div>input {
        background-color: #ecf0f1;
        color: #2c3e50;
    }
    .custom-image-container {
        border: 2px solid #bdc3c7;
        border-radius: 10px;
        overflow: hidden;
    }
    .custom-image {
        width: 100%;
        height: 400px;
        object-fit: cover;
        border-radius: 10px;
    }
    .description-box {
        background-color: rgba(52, 152, 219, 0.1);
        border-left: 5px solid #3498db;
        padding: 12px;
        border-radius: 0 6px 6px 0;
        transition: all 0.3s ease;
        margin-bottom: 5px;
    }
    .description-box:hover {
        background-color: rgba(52, 152, 219, 0.2);
        box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
    }
    .description-box p {
        color: #2c3e50;
        font-size: 16px;
        line-height: 1.6;
        transition: all 0.3s ease;
    }
    .description-box:hover p {
        font-weight: bold;
    }
    .info-box {
        background-color: rgba(52, 152, 219, 0.1);
        border-left: 5px solid #3498db;
        padding: 10px;
        border-radius: 0 10px 10px 0;
        transition: all 0.3s ease;
        margin-bottom: 5px;
    }
    .info-box:hover {
        background-color: rgba(52, 152, 219, 0.2);
        box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
    }
    .info-box p {
        color: #2c3e50;
        font-size: 16px;
        line-height: 1.6;
        transition: all 0.3s ease;
        margin: 0;
    }
    .info-box:hover p {
        font-weight: bold;
    }
    .stTextInput > div > div > input {
        background-color: #ffffff;
        color: #2c3e50;
        border: 2px solid #3498db;
        border-radius: 5px;
        padding: 8px 12px;
    }
    .stButton > button {
        background-color: #3498db;
        color: white;
        border: none;
        border-radius: 5px;
        padding: 8px 16px;
        font-weight: bold;
        transition: all 0.3s ease;
    }
    .stButton > button:hover {
        background-color: #2980b9;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
    }
    [data-testid=stSidebar] {
        background-color: #f0f8ff;
        padding: 20px;
    }
    [data-testid=stSidebar] .stTitle h1 {
        color: #2c3e50 !important;
        font-size: 24px;
        margin-bottom: 20px;
    }
    .main-content {
        padding-left: 0 !important;
    }
    .stColumns {
        gap: 1rem !important;
    }
    .streamlit-expanderHeader {
        background-color: #3498db;
        color: white !important;
        border-radius: 5px;
        padding: 10px 15px;
        font-weight: bold;
        transition: all 0.3s ease;
    }
    .streamlit-expanderHeader:hover {
        background-color: #2980b9;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
    }
    .streamlit-expanderContent {
        border: 1px solid #3498db;
        border-radius: 0 0 5px 5px;
        padding: 10px;
    }
    </style>

    <script>
    const mutationObserver = new MutationObserver(function(mutations) {
        mutations.forEach(function(mutation) {
            if (mutation.type === 'childList') {
                const descriptionBoxes = document.querySelectorAll('.description-box');
                descriptionBoxes.forEach(box => {
                    const paragraphs = box.querySelectorAll('p');
                    paragraphs.forEach(p => {
                        p.textContent = p.textContent.replace(/^<strong>|<\/strong>$/g, '');
                    });
                });
            }
        });
    });

    mutationObserver.observe(document.body, {
        childList: true,
        subtree: true
    });
    </script>
    """, unsafe_allow_html=True)

    with st.sidebar:
        st.title("🖼️ 圖片分析")
        if 'uploaded_files' not in st.session_state:
            st.session_state.uploaded_files = []
        new_uploads = st.file_uploader("新增/刪除圖片", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
        current_files = {f.name: f for f in new_uploads} if new_uploads else {}
        st.session_state.uploaded_files = [f for f in st.session_state.uploaded_files if f.name in current_files]
        for file_name, file in current_files.items():
            if file_name not in [f.name for f in st.session_state.uploaded_files]:
                st.session_state.uploaded_files.append(file)
        uploaded_files = st.session_state.uploaded_files
        with st.expander("詮釋圖片語言", expanded=False):
            language = st.radio("", ["繁體中文", "English"], index=0)
        st.markdown("### 🤖 Model Settings")
        model_options = ["gpt-4o", "gemini-1.5-pro", "gpt-4o-mini", "custom"]
        selected_model = st.selectbox("Select Model", model_options)
        if selected_model == "custom":
            custom_model = st.text_input("Enter custom model name")
            model = custom_model if custom_model else "groq/llava-v1.5-7b-4096-preview"
        else:
            model = selected_model
        st.markdown("### 🔑 API Settings")
        api_key = st.text_input("API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""))
        api_base = st.text_input("API Base URL", value=os.getenv("OPENAI_API_BASE", ""))
        if st.button("Save API Settings"):
            os.environ["OPENAI_API_KEY"] = api_key
            os.environ["OPENAI_API_BASE"] = api_base
            st.success("API settings saved successfully")
        st.markdown("""
        <div class="info-box">
            <p>系統部署:江信宗<br>Vision Language Models</p>
        </div>
        """, unsafe_allow_html=True)

    st.markdown('<div class="main-content">', unsafe_allow_html=True)
    st.title("🌄 AI-Powered Visual Storytelling")
    if api_key and uploaded_files:
        if 'analyzed_files' not in st.session_state:
            st.session_state.analyzed_files = {}
        files_to_remove = set(st.session_state.analyzed_files.keys()) - set(f.name for f in uploaded_files)
        for file_name in files_to_remove:
            del st.session_state.analyzed_files[file_name]
        for i in range(0, len(uploaded_files), 2):
            img_col1, img_col2 = st.columns(2)
            for j in range(2):
                if i + j < len(uploaded_files):
                    with img_col1 if j == 0 else img_col2:
                        uploaded_file = uploaded_files[i + j]
                        image = Image.open(uploaded_file)
                        resized_image = resize_image(image)
                        buffered = io.BytesIO()
                        resized_image.save(buffered, format="PNG")
                        img_str = base64.b64encode(buffered.getvalue()).decode()
                        st.markdown(f"""
                        <div class="custom-image-container">
                            <img src="data:image/png;base64,{img_str}" class="custom-image">
                        </div>
                        <p style="text-align: center; color: black;">{uploaded_file.name}</p>
                        """, unsafe_allow_html=True)
                        if uploaded_file.name not in st.session_state.analyzed_files:
                            with st.spinner("分析圖片及生成語音中..."):
                                try:
                                    description = analyze_image(image, api_key, model)
                                    if language == "繁體中文":
                                        with st.spinner("翻譯中..."):
                                            description = translate_to_chinese(description, api_key, model)
                                    st.session_state.analyzed_files[uploaded_file.name] = description
                                    time.sleep(1)
                                except Exception as e:
                                    st.error(f"處理圖片時發生錯誤: {str(e)}")
                                    continue
                        description = st.session_state.analyzed_files[uploaded_file.name]
                        paragraphs = [p.strip() for p in description.split('\n') if p.strip()]
                        if paragraphs:
                            formatted_description = ''.join([f'<p style="margin: 0;">{p}</p>' for p in paragraphs])
                            st.markdown(f'<div class="description-box">{formatted_description}</div>', unsafe_allow_html=True)
                            tts = gTTS(text=description, lang='zh-tw' if language == "繁體中文" else 'en')
                            audio_file = f"audio_{uuid.uuid4()}.mp3"
                            tts.save(audio_file)
                            st.audio(audio_file)
                            os.remove(audio_file)
                        else:
                            st.warning("無法獲取圖片描述。")
    elif uploaded_files:
        st.warning("請輸入有效的 API Key 以分析圖片。")
    st.markdown('</div>', unsafe_allow_html=True)

if __name__ == "__main__":
    main()