Spaces:

youngtsai
/

NBLM

Sleeping

App Files Files Community

NBLM / app.py

youngtsai

password

e485414 2 months ago

raw

history blame

11 kB

	# -- coding: utf-8 --

	from typing import Container
	from config.config import PASSWORD
	import gradio as gr
	import os
	import shutil
	import tempfile
	from google import genai
	from google.genai import types

	from initializer import initialize_clients, initialize_password

	# 初始化 Google Cloud Storage 服務和 GENAI 客戶端
	GCS_SERVICE, GENAI_CLIENT = initialize_clients()
	GCS_CLIENT = GCS_SERVICE.client

	PASSWORD = initialize_password()

	def toggle_visibility(toggle_value):
	return gr.update(visible=toggle_value)

	def mock_question_answer(question, history):
	# 假資料模擬回答
	answers = {
	"文件的核心觀點是什麼？": "這份文件的核心觀點是關於人工智慧如何提升工作效率。",
	"有哪些關鍵詞或數據？": "關鍵詞包括：人工智慧、工作效率、數據分析。",
	"文件的摘要是什麼？": "這份文件討論了如何利用人工智慧工具，提升企業的運營效率和決策速度。"
	}
	response = answers.get(question, "抱歉，我無法回答這個問題。請嘗試其他問題！")
	history.append({"role": "user", "content": question})
	history.append({"role": "assistant", "content": response})
	return history, ""

	def mock_summary():
	# 假資料模擬摘要
	return "這份文件主要討論人工智慧在工作效率提升方面的應用，並提供了實際案例來說明其價值。"

	def add_to_file_list(file, file_list):
	if file:
	temp_dir = tempfile.gettempdir()
	temp_path = os.path.join(temp_dir, os.path.basename(file.name))
	shutil.copy(file.name, temp_path) # 將文件存儲到臨時目錄
	file_list.append(temp_path)
	display_list = [os.path.basename(path) if os.path.basename(path) else path for path in file_list]
	return gr.update(choices=display_list), None

	def add_youtube_to_list(youtube_link, file_list):
	if youtube_link:
	file_list.append(youtube_link)
	display_list = [os.path.basename(path) if os.path.basename(path) else path for path in file_list]
	return gr.update(choices=display_list), ""

	def generate_transcript(youtube_link):
	print(f"\n開始生成 YouTube 逐字稿: {youtube_link}")
	try:
	print("初始化 Gemini 模型設定...")
	video = types.Part.from_uri(
	file_uri=youtube_link,
	mime_type="video/*",
	)

	model = "gemini-2.0-flash-exp"
	contents = [
	types.Content(
	role="user",
	parts=[
	video,
	types.Part.from_text("""請給我帶時間軸的逐字稿，請統一用 zhTW語言""")
	]
	)
	]
	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	max_output_tokens=8192,
	response_modalities=["TEXT"],
	safety_settings=[
	types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
	types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"),
	types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"),
	types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF")
	],
	)

	print("開始串流生成逐字稿...")
	transcript_text = ""
	for chunk in GENAI_CLIENT.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	# Extract only text content from candidates
	if hasattr(chunk, 'candidates') and chunk.candidates:
	for candidate in chunk.candidates:
	if (hasattr(candidate, 'content') and
	hasattr(candidate.content, 'parts')):
	for part in candidate.content.parts:
	if hasattr(part, 'text') and part.text:
	transcript_text += part.text
	print(".", end="", flush=True)

	print("\n逐字稿生成完成！")
	return transcript_text
	except Exception as e:
	print(f"\n生成逐字稿時發生錯誤: {str(e)}")
	raise

	def generate_summary(transcript):
	"""Generate a summary from the transcript using Gemini."""
	try:
	print("\n開始生成摘要...")
	model = "gemini-2.0-flash-exp"
	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_text(
	f"""請根據以下逐字稿生成重點摘要，以條列方式呈現主要觀點：

	{transcript}

	請以下列格式輸出：
	# 主要觀點：
	1. [重點1]
	2. [重點2]
	...

	# 結論：
	[整體結論]
	"""
	)
	]
	)
	]

	response = GENAI_CLIENT.models.generate_content(
	model=model,
	contents=contents,
	)

	print("摘要生成完成！")
	return response.text
	except Exception as e:
	print(f"\n生成摘要時發生錯誤: {str(e)}")
	raise

	def process_all_files(file_list):
	print("\n=== 開始處理檔案 ===")
	print(f"待處理檔案數量: {len(file_list)}")

	result_text = ""
	transcript_text = ""

	for index, file in enumerate(file_list, 1):
	print(f"\n處理第 {index}/{len(file_list)} 個檔案: {file}")

	if "youtube.com" in file or "youtu.be" in file:
	print(f"檢測到 YouTube 連結，開始生成逐字稿...")
	try:
	transcript = generate_transcript(file)
	print("✓ YouTube 逐字稿生成成功")
	result_text += f"🟢 YouTube 影片處理完成: {file}\n"
	transcript_text += f"\n=== {file} 的逐字稿 ===\n{transcript}\n"
	except Exception as e:
	print(f"✗ YouTube 逐字稿生成失敗: {str(e)}")
	result_text += f"🔴 YouTube 影片處理失敗: {file}\n"
	else:
	print(f"處理一般檔案: {file}")
	try:
	# 這裡可以加入其他檔案的處理邏輯
	print("✓ 檔案處理成功")
	result_text += f"🟢 檔案處理完成: {file}\n"
	except Exception as e:
	print(f"✗ 檔案處理失敗: {str(e)}")
	result_text += f"🔴 檔案處理失敗: {file}\n"

	print("\n=== 檔案處理完成 ===")
	return result_text, transcript_text

	def process_with_auth(password, file_list):
	"""包含密碼驗證的處理函數"""
	if not password or password != PASSWORD:
	return "請輸入正確的密碼", "", gr.update(visible=False)

	result_text, transcript_text = process_all_files(file_list)
	return result_text, transcript_text

	def on_summary_click(transcript):
	if not transcript:
	return "請先上傳文件或輸入 YouTube 連結並處理完成後再生成摘要。"

	summary = generate_summary(transcript)
	return summary

	with gr.Blocks() as demo:

	with gr.Row():
	gr.Markdown("# AI Notes Assistant")
	password_input = gr.Textbox(label="password")

	with gr.Row():
	source_toggle = gr.Checkbox(label="顯示來源選單", value=True)
	chat_toggle = gr.Checkbox(label="顯示對話區域", value=True)
	feature_toggle = gr.Checkbox(label="顯示功能卡片", value=True)

	with gr.Row():
	with gr.Column(visible=True) as source_column:
	gr.Markdown("### 來源選單")

	file_list = gr.State([])

	with gr.Tab("YouTube 連結"):
	youtube_link = gr.Textbox(label="輸入 YouTube 連結")
	add_youtube_button = gr.Button("添加到來源列表")
	add_youtube_button.click(add_youtube_to_list, inputs=[youtube_link, file_list], outputs=[file_list, youtube_link])

	with gr.Tab("上傳檔案"):
	upload_file = gr.File(label="從電腦添加文件", file_types=[".txt", ".pdf", ".docx"])
	add_file_button = gr.Button("添加到來源列表")
	add_file_button.click(add_to_file_list, inputs=[upload_file, file_list], outputs=[file_list, upload_file])

	file_display = gr.CheckboxGroup(label="已上傳的文件", interactive=True)

	process_files_button = gr.Button("處理檔案")
	rag_result = gr.Textbox(label="處理狀態", interactive=False)

	file_list.change(lambda x: gr.update(choices = [os.path.basename(path) if os.path.basename(path) else path for path in x]), inputs=file_list, outputs=file_display)

	with gr.Column(visible=True) as chat_column:
	gr.Markdown("### 對話區域")
	chatbot = gr.Chatbot(label="聊天記錄", type="messages")
	question = gr.Textbox(label="輸入問題，例如：文件的核心觀點是什麼？")
	ask_button = gr.Button("提問")

	with gr.Column(visible=True) as feature_column:
	gr.Markdown("### 功能卡片")
	with gr.Tab("摘要生成"):
	summary_button = gr.Button("生成摘要", visible=False)
	summary_output = gr.Markdown(
	label="摘要",
	show_label=True,
	show_copy_button=True,
	container=True
	)
	with gr.Tab("逐字稿"):
	transcript_display = gr.Textbox(
	label="YouTube 逐字稿",
	interactive=False,
	lines=10,
	show_copy_button=True,
	placeholder="處理 YouTube 影片後，逐字稿將顯示在這裡..."
	)
	with gr.Tab("其他功能"):
	gr.Markdown("此處可以添加更多功能卡片")

	source_toggle.change(toggle_visibility, inputs=source_toggle, outputs=source_column)
	chat_toggle.change(toggle_visibility, inputs=chat_toggle, outputs=chat_column)
	feature_toggle.change(toggle_visibility, inputs=feature_toggle, outputs=feature_column)

	# 更新處理檔案按鈕的事件處理
	process_files_button.click(
	fn=process_with_auth,
	inputs=[password_input, file_list],
	outputs=[
	rag_result,
	transcript_display
	]
	).then(
	fn=on_summary_click,
	inputs=[transcript_display],
	outputs=[summary_output]
	)

	history = gr.State([])
	ask_button.click(mock_question_answer, inputs=[question, history], outputs=[chatbot, question])
	summary_button.click(
	fn=on_summary_click,
	inputs=[transcript_display],
	outputs=[summary_output]
	)



	demo.launch(share=True)