Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,9 @@ from pathlib import Path
|
|
| 20 |
from threading import Thread
|
| 21 |
from dotenv import load_dotenv
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
# Edge TTS imports
|
| 24 |
import edge_tts
|
| 25 |
from pydub import AudioSegment
|
|
@@ -198,6 +201,28 @@ class UnifiedAudioConverter:
|
|
| 198 |
except httpx.HTTPError as e:
|
| 199 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
def _get_messages_formatter_type(self, model_name):
|
| 202 |
"""Get appropriate message formatter for the model"""
|
| 203 |
if "Mistral" in model_name or "BitSix" in model_name:
|
|
@@ -431,8 +456,9 @@ class UnifiedAudioConverter:
|
|
| 431 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
| 432 |
if language == "Korean":
|
| 433 |
voices = [
|
| 434 |
-
"ko-KR-
|
| 435 |
-
"ko-KR-
|
|
|
|
| 436 |
]
|
| 437 |
else:
|
| 438 |
voices = [
|
|
@@ -489,7 +515,7 @@ class UnifiedAudioConverter:
|
|
| 489 |
# Create different voice characteristics for different speakers
|
| 490 |
if language == "Korean":
|
| 491 |
voice_configs = [
|
| 492 |
-
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "
|
| 493 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
| 494 |
]
|
| 495 |
else:
|
|
@@ -643,14 +669,18 @@ class UnifiedAudioConverter:
|
|
| 643 |
converter = UnifiedAudioConverter(ConversationConfig())
|
| 644 |
|
| 645 |
|
| 646 |
-
async def synthesize(
|
| 647 |
-
"""Main synthesis function -
|
| 648 |
-
if not article_url:
|
| 649 |
-
return "Please provide a valid URL.", None
|
| 650 |
-
|
| 651 |
try:
|
| 652 |
-
#
|
| 653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
|
| 655 |
# Limit text to max words
|
| 656 |
words = text.split()
|
|
@@ -734,9 +764,9 @@ async def regenerate_audio(conversation_text: str, tts_engine: str = "Edge-TTS",
|
|
| 734 |
return f"Error generating audio: {str(e)}", None
|
| 735 |
|
| 736 |
|
| 737 |
-
def synthesize_sync(
|
| 738 |
"""Synchronous wrapper for async synthesis"""
|
| 739 |
-
return asyncio.run(synthesize(
|
| 740 |
|
| 741 |
|
| 742 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
|
@@ -764,6 +794,14 @@ def update_tts_engine_for_korean(language):
|
|
| 764 |
)
|
| 765 |
|
| 766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
| 768 |
if LLAMA_CPP_AVAILABLE:
|
| 769 |
try:
|
|
@@ -778,9 +816,9 @@ if LLAMA_CPP_AVAILABLE:
|
|
| 778 |
|
| 779 |
|
| 780 |
# Gradio Interface
|
| 781 |
-
with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
| 782 |
-
gr.Markdown("# ๐๏ธ URL to Podcast Converter")
|
| 783 |
-
gr.Markdown("Convert any article, blog, or
|
| 784 |
|
| 785 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
| 786 |
with gr.Row():
|
|
@@ -793,11 +831,29 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
| 793 |
|
| 794 |
with gr.Row():
|
| 795 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
url_input = gr.Textbox(
|
| 797 |
label="Article URL",
|
| 798 |
placeholder="Enter the article URL here...",
|
| 799 |
-
value=""
|
|
|
|
| 800 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 801 |
with gr.Column(scale=1):
|
| 802 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
| 803 |
language_selector = gr.Radio(
|
|
@@ -871,16 +927,23 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
| 871 |
|
| 872 |
gr.Examples(
|
| 873 |
examples=[
|
| 874 |
-
["https://huggingface.co/blog/openfree/cycle-navigator", "Local", "Edge-TTS", "English"],
|
| 875 |
-
["https://www.bbc.com/news/technology-67988517", "Local", "Spark-TTS", "English"],
|
| 876 |
-
["https://huggingface.co/papers/2505.14810", "Local", "Edge-TTS", "Korean"],
|
| 877 |
],
|
| 878 |
-
inputs=[url_input, mode_selector, tts_selector, language_selector],
|
| 879 |
outputs=[conversation_output, status_output],
|
| 880 |
fn=synthesize_sync,
|
| 881 |
cache_examples=False,
|
| 882 |
)
|
| 883 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 884 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
| 885 |
language_selector.change(
|
| 886 |
fn=update_tts_engine_for_korean,
|
|
@@ -888,10 +951,19 @@ with gr.Blocks(theme='soft', title="URL to Podcast Converter") as demo:
|
|
| 888 |
outputs=[tts_selector]
|
| 889 |
)
|
| 890 |
|
| 891 |
-
# ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 892 |
convert_btn.click(
|
| 893 |
-
fn=synthesize_sync
|
| 894 |
-
|
|
|
|
|
|
|
| 895 |
outputs=[conversation_output, status_output]
|
| 896 |
)
|
| 897 |
|
|
|
|
| 20 |
from threading import Thread
|
| 21 |
from dotenv import load_dotenv
|
| 22 |
|
| 23 |
+
# PDF processing imports
|
| 24 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 25 |
+
|
| 26 |
# Edge TTS imports
|
| 27 |
import edge_tts
|
| 28 |
from pydub import AudioSegment
|
|
|
|
| 201 |
except httpx.HTTPError as e:
|
| 202 |
raise RuntimeError(f"Failed to fetch URL: {e}")
|
| 203 |
|
| 204 |
+
def extract_text_from_pdf(self, pdf_file) -> str:
|
| 205 |
+
"""Extract text content from PDF file"""
|
| 206 |
+
try:
|
| 207 |
+
# ์์ ํ์ผ๋ก ์ ์ฅ
|
| 208 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
| 209 |
+
tmp_file.write(pdf_file.read())
|
| 210 |
+
tmp_path = tmp_file.name
|
| 211 |
+
|
| 212 |
+
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
| 213 |
+
loader = PyPDFLoader(tmp_path)
|
| 214 |
+
pages = loader.load()
|
| 215 |
+
|
| 216 |
+
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
| 217 |
+
text = "\n".join([page.page_content for page in pages])
|
| 218 |
+
|
| 219 |
+
# ์์ ํ์ผ ์ญ์
|
| 220 |
+
os.unlink(tmp_path)
|
| 221 |
+
|
| 222 |
+
return text
|
| 223 |
+
except Exception as e:
|
| 224 |
+
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
| 225 |
+
|
| 226 |
def _get_messages_formatter_type(self, model_name):
|
| 227 |
"""Get appropriate message formatter for the model"""
|
| 228 |
if "Mistral" in model_name or "BitSix" in model_name:
|
|
|
|
| 456 |
# ์ธ์ด๋ณ ์์ฑ ์ค์
|
| 457 |
if language == "Korean":
|
| 458 |
voices = [
|
| 459 |
+
"ko-KR-HyunsuNeural", # ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
| 460 |
+
"ko-KR-InJoonNeural" # ๋จ๋จ์ฑ ์์ฑ (์์ฐ์ค๋ฌ์ด ํ๊ตญ์ด)
|
| 461 |
+
|
| 462 |
]
|
| 463 |
else:
|
| 464 |
voices = [
|
|
|
|
| 515 |
# Create different voice characteristics for different speakers
|
| 516 |
if language == "Korean":
|
| 517 |
voice_configs = [
|
| 518 |
+
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ํ์บ์คํธ ์งํ์ ๋งก์ ์งํ์์
๋๋ค.", "gender": "male"},
|
| 519 |
{"prompt_text": "์๋
ํ์ธ์, ์ค๋ ๊ฒ์คํธ๋ก ์ฐธ์ฌํ๊ฒ ๋์ด ๊ธฐ์ฉ๋๋ค.", "gender": "male"}
|
| 520 |
]
|
| 521 |
else:
|
|
|
|
| 669 |
converter = UnifiedAudioConverter(ConversationConfig())
|
| 670 |
|
| 671 |
|
| 672 |
+
async def synthesize(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
| 673 |
+
"""Main synthesis function - handles both URL and PDF inputs"""
|
|
|
|
|
|
|
|
|
|
| 674 |
try:
|
| 675 |
+
# Extract text based on input type
|
| 676 |
+
if input_type == "URL":
|
| 677 |
+
if not article_input or not isinstance(article_input, str):
|
| 678 |
+
return "Please provide a valid URL.", None
|
| 679 |
+
text = converter.fetch_text(article_input)
|
| 680 |
+
else: # PDF
|
| 681 |
+
if not article_input:
|
| 682 |
+
return "Please upload a PDF file.", None
|
| 683 |
+
text = converter.extract_text_from_pdf(article_input)
|
| 684 |
|
| 685 |
# Limit text to max words
|
| 686 |
words = text.split()
|
|
|
|
| 764 |
return f"Error generating audio: {str(e)}", None
|
| 765 |
|
| 766 |
|
| 767 |
+
def synthesize_sync(article_input, input_type: str = "URL", mode: str = "Local", tts_engine: str = "Edge-TTS", language: str = "English"):
|
| 768 |
"""Synchronous wrapper for async synthesis"""
|
| 769 |
+
return asyncio.run(synthesize(article_input, input_type, mode, tts_engine, language))
|
| 770 |
|
| 771 |
|
| 772 |
def regenerate_audio_sync(conversation_text: str, tts_engine: str = "Edge-TTS", language: str = "English"):
|
|
|
|
| 794 |
)
|
| 795 |
|
| 796 |
|
| 797 |
+
def toggle_input_visibility(input_type):
|
| 798 |
+
"""Toggle visibility of URL input and file upload based on input type"""
|
| 799 |
+
if input_type == "URL":
|
| 800 |
+
return gr.update(visible=True), gr.update(visible=False)
|
| 801 |
+
else: # PDF
|
| 802 |
+
return gr.update(visible=False), gr.update(visible=True)
|
| 803 |
+
|
| 804 |
+
|
| 805 |
# ๋ชจ๋ธ ์ด๊ธฐํ (์ฑ ์์ ์)
|
| 806 |
if LLAMA_CPP_AVAILABLE:
|
| 807 |
try:
|
|
|
|
| 816 |
|
| 817 |
|
| 818 |
# Gradio Interface
|
| 819 |
+
with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
|
| 820 |
+
gr.Markdown("# ๐๏ธ URL/PDF to Podcast Converter")
|
| 821 |
+
gr.Markdown("Convert any article, blog, news, or PDF document into an engaging podcast conversation!")
|
| 822 |
|
| 823 |
# ์๋จ์ ๋ก์ปฌ LLM ์ํ ํ์
|
| 824 |
with gr.Row():
|
|
|
|
| 831 |
|
| 832 |
with gr.Row():
|
| 833 |
with gr.Column(scale=3):
|
| 834 |
+
# Input type selector
|
| 835 |
+
input_type_selector = gr.Radio(
|
| 836 |
+
choices=["URL", "PDF"],
|
| 837 |
+
value="URL",
|
| 838 |
+
label="Input Type",
|
| 839 |
+
info="Choose between URL or PDF file upload"
|
| 840 |
+
)
|
| 841 |
+
|
| 842 |
+
# URL input
|
| 843 |
url_input = gr.Textbox(
|
| 844 |
label="Article URL",
|
| 845 |
placeholder="Enter the article URL here...",
|
| 846 |
+
value="",
|
| 847 |
+
visible=True
|
| 848 |
)
|
| 849 |
+
|
| 850 |
+
# PDF upload
|
| 851 |
+
pdf_input = gr.File(
|
| 852 |
+
label="Upload PDF",
|
| 853 |
+
file_types=[".pdf"],
|
| 854 |
+
visible=False
|
| 855 |
+
)
|
| 856 |
+
|
| 857 |
with gr.Column(scale=1):
|
| 858 |
# ์ธ์ด ์ ํ ์ถ๊ฐ
|
| 859 |
language_selector = gr.Radio(
|
|
|
|
| 927 |
|
| 928 |
gr.Examples(
|
| 929 |
examples=[
|
| 930 |
+
["https://huggingface.co/blog/openfree/cycle-navigator", "URL", "Local", "Edge-TTS", "English"],
|
| 931 |
+
["https://www.bbc.com/news/technology-67988517", "URL", "Local", "Spark-TTS", "English"],
|
| 932 |
+
["https://huggingface.co/papers/2505.14810", "URL", "Local", "Edge-TTS", "Korean"],
|
| 933 |
],
|
| 934 |
+
inputs=[url_input, input_type_selector, mode_selector, tts_selector, language_selector],
|
| 935 |
outputs=[conversation_output, status_output],
|
| 936 |
fn=synthesize_sync,
|
| 937 |
cache_examples=False,
|
| 938 |
)
|
| 939 |
|
| 940 |
+
# Input type change handler
|
| 941 |
+
input_type_selector.change(
|
| 942 |
+
fn=toggle_input_visibility,
|
| 943 |
+
inputs=[input_type_selector],
|
| 944 |
+
outputs=[url_input, pdf_input]
|
| 945 |
+
)
|
| 946 |
+
|
| 947 |
# ์ธ์ด ๋ณ๊ฒฝ ์ TTS ์์ง ์ต์
์
๋ฐ์ดํธ
|
| 948 |
language_selector.change(
|
| 949 |
fn=update_tts_engine_for_korean,
|
|
|
|
| 951 |
outputs=[tts_selector]
|
| 952 |
)
|
| 953 |
|
| 954 |
+
# ์ด๋ฒคํธ ์ฐ๊ฒฐ - ์์ ๋ ๋ถ๋ถ
|
| 955 |
+
def get_article_input(input_type, url_input, pdf_input):
|
| 956 |
+
"""Get the appropriate input based on input type"""
|
| 957 |
+
if input_type == "URL":
|
| 958 |
+
return url_input
|
| 959 |
+
else:
|
| 960 |
+
return pdf_input
|
| 961 |
+
|
| 962 |
convert_btn.click(
|
| 963 |
+
fn=lambda input_type, url_input, pdf_input, mode, tts, lang: synthesize_sync(
|
| 964 |
+
get_article_input(input_type, url_input, pdf_input), input_type, mode, tts, lang
|
| 965 |
+
),
|
| 966 |
+
inputs=[input_type_selector, url_input, pdf_input, mode_selector, tts_selector, language_selector],
|
| 967 |
outputs=[conversation_output, status_output]
|
| 968 |
)
|
| 969 |
|