Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
# 財政部財政資訊中心 江信宗 | |
import streamlit as st | |
import requests | |
from PIL import Image | |
import io | |
import base64 | |
import time | |
import uuid | |
import json | |
from gtts import gTTS | |
import os | |
from litellm import completion | |
from dotenv import load_dotenv | |
load_dotenv() | |
def compress_image(image, max_size=(800, 800), quality=95): | |
img_copy = image.copy() | |
img_copy.thumbnail(max_size) | |
buffered = io.BytesIO() | |
img_copy.save(buffered, format="JPEG", quality=quality) | |
return buffered.getvalue() | |
def analyze_image(image, api_key, model): | |
compressed_image = compress_image(image) | |
img_str = base64.b64encode(compressed_image).decode() | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Carefully observe this image and describe it in as much detail as possible. Please address the following aspects: primary subject matter, background setting, color palette, emotional conveyance, and specific details."}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{img_str}" | |
} | |
} | |
] | |
} | |
] | |
response = completion(model=model, messages=messages, max_tokens=1024) | |
return response.choices[0].message.content.strip() | |
def translate_to_chinese(text, api_key, model): | |
if "groq/" in model: | |
translation_model = "groq/gemma2-9b-it" | |
else: | |
translation_model = model | |
messages = [ | |
{ | |
"role": "system", | |
"content": "You are an expert translator proficient in both Traditional Chinese and English, with 40 years of translation experience and extensive cross-disciplinary knowledge. You have been deeply involved in the Chinese translations of The New York Times and Bloomberg, and have a deep understanding of the translation of current events and academic papers. I would like you to translate the following English text into Traditional Chinese, with a style similar to the Chinese versions of the aforementioned magazines. I would like to request a translation of the following English content into Traditional Chinese. Please ensure that the translation is accurate and natural-sounding." | |
}, | |
{ | |
"role": "user", | |
"content": f"THAT'S IMPORTANT OTHERWISE I'LL DIE. Translate the Text ``` {text} ``` into \"Traditional Chinese\". Must reply to me in Traditional Chinese." | |
} | |
] | |
response = completion(model=translation_model, messages=messages, max_tokens=1024) | |
return response.choices[0].message.content.strip() | |
def resize_image(image, target_height=400): | |
original_width, original_height = image.size | |
aspect_ratio = original_width / original_height | |
target_width = int(target_height * aspect_ratio) | |
resized_image = image.resize((target_width, target_height), Image.LANCZOS) | |
return resized_image | |
def main(): | |
st.set_page_config( | |
layout="wide", | |
page_title="AI-Powered Visual Storytelling", | |
page_icon="🖼️", | |
menu_items={ | |
'Get Help': None, | |
'Report a bug': None, | |
'About': '# 圖片AI辨識應用\n使用AI分析圖片內容之網頁程式。' | |
} | |
) | |
st.markdown(""" | |
<style> | |
.stApp { | |
background-image: linear-gradient(to bottom, #e6f3ff, #ffffff); | |
} | |
.stTitle, .stMarkdown, .stRadio, .stFileUploader, .stTextInput > label, p { | |
color: black !important; | |
} | |
.stTitle h1 { | |
color: black !important; | |
} | |
.stButton>button { | |
background-color: #3498db; | |
color: white; | |
} | |
.stTextInput>div>div>input { | |
background-color: #ecf0f1; | |
color: #2c3e50; | |
} | |
.custom-image-container { | |
border: 2px solid #bdc3c7; | |
border-radius: 10px; | |
overflow: hidden; | |
} | |
.custom-image { | |
width: 100%; | |
height: 400px; | |
object-fit: cover; | |
border-radius: 10px; | |
} | |
.description-box { | |
background-color: rgba(52, 152, 219, 0.1); | |
border-left: 5px solid #3498db; | |
padding: 12px; | |
border-radius: 0 6px 6px 0; | |
transition: all 0.3s ease; | |
margin-bottom: 5px; | |
} | |
.description-box:hover { | |
background-color: rgba(52, 152, 219, 0.2); | |
box-shadow: 0 0 10px rgba(52, 152, 219, 0.5); | |
} | |
.description-box p { | |
color: #2c3e50; | |
font-size: 16px; | |
line-height: 1.6; | |
transition: all 0.3s ease; | |
} | |
.description-box:hover p { | |
font-weight: bold; | |
} | |
.info-box { | |
background-color: rgba(52, 152, 219, 0.1); | |
border-left: 5px solid #3498db; | |
padding: 10px; | |
border-radius: 0 10px 10px 0; | |
transition: all 0.3s ease; | |
margin-bottom: 5px; | |
} | |
.info-box:hover { | |
background-color: rgba(52, 152, 219, 0.2); | |
box-shadow: 0 0 10px rgba(52, 152, 219, 0.5); | |
} | |
.info-box p { | |
color: #2c3e50; | |
font-size: 16px; | |
line-height: 1.6; | |
transition: all 0.3s ease; | |
margin: 0; | |
} | |
.info-box:hover p { | |
font-weight: bold; | |
} | |
.stTextInput > div > div > input { | |
background-color: #ffffff; | |
color: #2c3e50; | |
border: 2px solid #3498db; | |
border-radius: 5px; | |
padding: 8px 12px; | |
} | |
.stButton > button { | |
background-color: #3498db; | |
color: white; | |
border: none; | |
border-radius: 5px; | |
padding: 8px 16px; | |
font-weight: bold; | |
transition: all 0.3s ease; | |
} | |
.stButton > button:hover { | |
background-color: #2980b9; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); | |
} | |
[data-testid=stSidebar] { | |
background-color: #f0f8ff; | |
padding: 20px; | |
} | |
[data-testid=stSidebar] .stTitle h1 { | |
color: #2c3e50 !important; | |
font-size: 24px; | |
margin-bottom: 20px; | |
} | |
.main-content { | |
padding-left: 0 !important; | |
} | |
.stColumns { | |
gap: 1rem !important; | |
} | |
.streamlit-expanderHeader { | |
background-color: #3498db; | |
color: white !important; | |
border-radius: 5px; | |
padding: 10px 15px; | |
font-weight: bold; | |
transition: all 0.3s ease; | |
} | |
.streamlit-expanderHeader:hover { | |
background-color: #2980b9; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); | |
} | |
.streamlit-expanderContent { | |
border: 1px solid #3498db; | |
border-radius: 0 0 5px 5px; | |
padding: 10px; | |
} | |
</style> | |
<script> | |
const mutationObserver = new MutationObserver(function(mutations) { | |
mutations.forEach(function(mutation) { | |
if (mutation.type === 'childList') { | |
const descriptionBoxes = document.querySelectorAll('.description-box'); | |
descriptionBoxes.forEach(box => { | |
const paragraphs = box.querySelectorAll('p'); | |
paragraphs.forEach(p => { | |
p.textContent = p.textContent.replace(/^<strong>|<\/strong>$/g, ''); | |
}); | |
}); | |
} | |
}); | |
}); | |
mutationObserver.observe(document.body, { | |
childList: true, | |
subtree: true | |
}); | |
</script> | |
""", unsafe_allow_html=True) | |
with st.sidebar: | |
st.title("🖼️ 圖片分析") | |
if 'uploaded_files' not in st.session_state: | |
st.session_state.uploaded_files = [] | |
new_uploads = st.file_uploader("新增/刪除圖片", type=["png", "jpg", "jpeg"], accept_multiple_files=True) | |
current_files = {f.name: f for f in new_uploads} if new_uploads else {} | |
st.session_state.uploaded_files = [f for f in st.session_state.uploaded_files if f.name in current_files] | |
for file_name, file in current_files.items(): | |
if file_name not in [f.name for f in st.session_state.uploaded_files]: | |
st.session_state.uploaded_files.append(file) | |
uploaded_files = st.session_state.uploaded_files | |
with st.expander("詮釋圖片語言", expanded=False): | |
language = st.radio("", ["繁體中文", "English"], index=0) | |
st.markdown("### 🤖 Model Settings") | |
model_options = ["gpt-4o", "gemini-1.5-pro", "gpt-4o-mini", "custom"] | |
selected_model = st.selectbox("Select Model", model_options) | |
if selected_model == "custom": | |
custom_model = st.text_input("Enter custom model name") | |
model = custom_model if custom_model else "groq/llava-v1.5-7b-4096-preview" | |
else: | |
model = selected_model | |
st.markdown("### 🔑 API Settings") | |
api_key = st.text_input("API Key", type="password", value=os.getenv("OPENAI_API_KEY", "")) | |
api_base = st.text_input("API Base URL", value=os.getenv("OPENAI_API_BASE", "")) | |
if st.button("Save API Settings"): | |
os.environ["OPENAI_API_KEY"] = api_key | |
os.environ["OPENAI_API_BASE"] = api_base | |
st.success("API settings saved successfully") | |
st.markdown(""" | |
<div class="info-box"> | |
<p>系統部署:江信宗<br>Vision Language Models</p> | |
</div> | |
""", unsafe_allow_html=True) | |
st.markdown('<div class="main-content">', unsafe_allow_html=True) | |
st.title("🌄 AI-Powered Visual Storytelling") | |
if api_key and uploaded_files: | |
if 'analyzed_files' not in st.session_state: | |
st.session_state.analyzed_files = {} | |
files_to_remove = set(st.session_state.analyzed_files.keys()) - set(f.name for f in uploaded_files) | |
for file_name in files_to_remove: | |
del st.session_state.analyzed_files[file_name] | |
for i in range(0, len(uploaded_files), 2): | |
img_col1, img_col2 = st.columns(2) | |
for j in range(2): | |
if i + j < len(uploaded_files): | |
with img_col1 if j == 0 else img_col2: | |
uploaded_file = uploaded_files[i + j] | |
image = Image.open(uploaded_file) | |
resized_image = resize_image(image) | |
buffered = io.BytesIO() | |
resized_image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode() | |
st.markdown(f""" | |
<div class="custom-image-container"> | |
<img src="data:image/png;base64,{img_str}" class="custom-image"> | |
</div> | |
<p style="text-align: center; color: black;">{uploaded_file.name}</p> | |
""", unsafe_allow_html=True) | |
if uploaded_file.name not in st.session_state.analyzed_files: | |
with st.spinner("分析圖片及生成語音中..."): | |
try: | |
description = analyze_image(image, api_key, model) | |
if language == "繁體中文": | |
with st.spinner("翻譯中..."): | |
description = translate_to_chinese(description, api_key, model) | |
st.session_state.analyzed_files[uploaded_file.name] = description | |
time.sleep(1) | |
except Exception as e: | |
st.error(f"處理圖片時發生錯誤: {str(e)}") | |
continue | |
description = st.session_state.analyzed_files[uploaded_file.name] | |
paragraphs = [p.strip() for p in description.split('\n') if p.strip()] | |
if paragraphs: | |
formatted_description = ''.join([f'<p style="margin: 0;">{p}</p>' for p in paragraphs]) | |
st.markdown(f'<div class="description-box">{formatted_description}</div>', unsafe_allow_html=True) | |
tts = gTTS(text=description, lang='zh-tw' if language == "繁體中文" else 'en') | |
audio_file = f"audio_{uuid.uuid4()}.mp3" | |
tts.save(audio_file) | |
st.audio(audio_file) | |
os.remove(audio_file) | |
else: | |
st.warning("無法獲取圖片描述。") | |
elif uploaded_files: | |
st.warning("請輸入有效的 API Key 以分析圖片。") | |
st.markdown('</div>', unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() | |