Spaces:
Sleeping
import gradio as gr
Browse filesimport pandas as pd
import requests
from bs4 import BeautifulSoup
from docx import Document
import os
from openai import OpenAI
from groq import Groq
import uuid
from gtts import gTTS
import math
from pydub import AudioSegment
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
import yt_dlp
from moviepy.editor import VideoFileClip
from pytube import YouTube
import os
import io
import time
import json
from urllib.parse import urlparse, parse_qs
from google.cloud import storage
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.http import MediaIoBaseUpload
from educational_material import EducationalMaterial
from storage_service import GoogleCloudStorage
import boto3
from chatbot import Chatbot
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true"
print(f"is_env_local: {is_env_local}")
print("===gr__version__===")
print(gr.__version__)
if is_env_local:
with open("local_config.json") as f:
config = json.load(f)
PASSWORD = config["PASSWORD"]
GCS_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"])
DRIVE_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"])
OPEN_AI_KEY = config["OPEN_AI_KEY"]
GROQ_API_KEY = config["GROQ_API_KEY"]
JUTOR_CHAT_KEY = config["JUTOR_CHAT_KEY"]
AWS_ACCESS_KEY = config["AWS_ACCESS_KEY"]
AWS_SECRET_KEY = config["AWS_SECRET_KEY"]
AWS_REGION_NAME = config["AWS_REGION_NAME"]
OUTPUT_PATH = config["OUTPUT_PATH"]
else:
PASSWORD = os.getenv("PASSWORD")
GCS_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
DRIVE_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
JUTOR_CHAT_KEY = os.getenv("JUTOR_CHAT_KEY")
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
AWS_REGION_NAME = 'us-west-2'
OUTPUT_PATH = 'videos'
TRANSCRIPTS = []
CURRENT_INDEX = 0
VIDEO_ID = ""
OPEN_AI_CLIENT = OpenAI(api_key=OPEN_AI_KEY)
GROQ_CLIENT = Groq(api_key=GROQ_API_KEY)
GCS_SERVICE = GoogleCloudStorage(GCS_KEY)
GCS_CLIENT = GCS_SERVICE.client
BEDROCK_CLIENT = boto3.client(
service_name="bedrock-runtime",
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY,
region_name=AWS_REGION_NAME,
)
# 驗證 password
def verify_password(password):
if password == PASSWORD:
return True
else:
raise gr.Error("密碼錯誤")
# ====gcs====
def gcs_check_file_exists(gcs_client, bucket_name, file_name):
"""
检查 GCS 存储桶中是否存在指定的文件
file_name 格式:{folder_name}/{file_name}
"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(file_name)
return blob.exists()
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
"""上传文件到指定的 GCS 存储桶"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(file_path)
print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
"""上传字符串到指定的 GCS 存储桶"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_string(json_string)
print(f"JSON string uploaded to {destination_blob_name} in GCS.")
def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
"""从 GCS 下载文件内容到字符串"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(source_blob_name)
return blob.download_as_text()
def make_blob_public(gcs_client, bucket_name, blob_name):
"""将指定的 GCS 对象设置为公共可读"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.make_public()
print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
def get_blob_public_url(gcs_client, bucket_name, blob_name):
"""获取指定 GCS 对象的公开 URL"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
return blob.public_url
def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path):
"""上传图片到 GCS 并获取其公开 URL"""
# 上传图片
upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path)
# 将上传的图片设置为公开
make_blob_public(gcs_client, bucket_name, file_name)
# 获取图片的公开 URL
public_url = get_blob_public_url(gcs_client, bucket_name, file_name)
print(f"Public URL for the uploaded image: {public_url}")
return public_url
def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
# Get all files from the folder
query = f"'{drive_folder_id}' in parents and trashed = false"
response = drive_service.files().list(q=query).execute()
files = response.get('files', [])
for file in files:
# Copy each file to GCS
file_id = file['id']
file_name = file['name']
gcs_destination_path = f"{gcs_folder_name}/{file_name}"
copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path):
# Download file content from Drive
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
fh.seek(0)
file_content = fh.getvalue()
# Upload file content to GCS
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(gcs_destination_path)
blob.upload_from_string(file_content)
print(f"File {file_id} copied to GCS at {gcs_destination_path}.")
def delete_blob(gcs_client, bucket_name, blob_name):
"""删除指定的 GCS 对象"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.delete()
print(f"Blob {blob_name} deleted from GCS.")
# # ====drive====初始化
def init_drive_service():
credentials_json_string = DRIVE_KEY
credentials_dict = json.loads(credentials_json_string)
SCOPES = ['https://www.googleapis.com/auth/drive']
credentials = service_account.Credentials.from_service_account_info(
credentials_dict, scopes=SCOPES)
service = build('drive', 'v3', credentials=credentials)
return service
def create_folder_if_not_exists(service, folder_name, parent_id):
print("检查是否存在特定名称的文件夹,如果不存在则创建")
query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
folders = response.get('files', [])
if not folders:
# 文件夹不存在,创建新文件夹
file_metadata = {
'name': folder_name,
'mimeType': 'application/vnd.google-apps.folder',
'parents': [parent_id]
}
folder = service.files().create(body=file_metadata, fields='id').execute()
return folder.get('id')
else:
# 文件夹已存在
return folders[0]['id']
# 检查Google Drive上是否存在文件
def check_file_exists(service, folder_name, file_name):
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
response = service.files().list(q=query).execute()
files = response.get('files', [])
return len(files) > 0, files[0]['id'] if files else None
def upload_content_directly(service, file_name, folder_id, content):
"""
直接将内容上传到Google Drive中的新文件。
"""
if not file_name:
raise ValueError("文件名不能为空")
if not folder_id:
raise ValueError("文件夹ID不能为空")
if content is None: # 允许空字符串上传,但不允许None
raise ValueError("内容不能为空")
file_metadata = {'name': file_name, 'parents': [folder_id]}
# 使用io.BytesIO为文本内容创建一个内存中的文件对象
try:
with io.BytesIO(content.encode('utf-8')) as fh:
media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
print("==content==")
print(content)
print("==content==")
print("==media==")
print(media)
print("==media==")
# 执行上传
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return file.get('id')
except Exception as e:
print(f"上传文件时发生错误: {e}")
raise # 重新抛出异常,调用者可以根据需要处理或忽略
def upload_file_directly(service, file_name, folder_id, file_path):
# 上傳 .json to Google Drive
file_metadata = {'name': file_name, 'parents': [folder_id]}
media = MediaFileUpload(file_path, mimetype='application/json')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
# return file.get('id') # 返回文件ID
return True
def upload_img_directly(service, file_name, folder_id, file_path):
file_metadata = {'name': file_name, 'parents': [folder_id]}
media = MediaFileUpload(file_path, mimetype='image/jpeg')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return file.get('id') # 返回文件ID
def download_file_as_string(service, file_id):
"""
从Google Drive下载文件并将其作为字符串返回。
"""
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
@@ -1139,8 +1139,16 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
|
|
1139 |
"response_format": response_format
|
1140 |
}
|
1141 |
|
1142 |
-
|
1143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1144 |
print("=====key_moments=====")
|
1145 |
print(key_moments)
|
1146 |
print("=====key_moments=====")
|
|
|
1139 |
"response_format": response_format
|
1140 |
}
|
1141 |
|
1142 |
+
try:
|
1143 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1144 |
+
key_moments = json.loads(response.choices[0].message.content)["key_moments"]
|
1145 |
+
except Exception as e:
|
1146 |
+
error_msg = f" {video_id} 關鍵時刻錯誤: {str(e)}"
|
1147 |
+
print("===generate_key_moments error===")
|
1148 |
+
print(error_msg)
|
1149 |
+
print("===generate_key_moments error===")
|
1150 |
+
key_moments = []
|
1151 |
+
|
1152 |
print("=====key_moments=====")
|
1153 |
print(key_moments)
|
1154 |
print("=====key_moments=====")
|