Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import soundfile as sf
|
|
8 |
from pydub import AudioSegment
|
9 |
from telegram import Update
|
10 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
11 |
-
from transformers import pipeline
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
from collections import defaultdict
|
@@ -32,8 +32,15 @@ try:
|
|
32 |
token=os.getenv("HF_TOKEN")
|
33 |
)
|
34 |
|
35 |
-
# 2. نموذج توليف الصوت
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
except Exception as e:
|
39 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
@@ -84,15 +91,18 @@ async def generate_response(text: str, user_id: str) -> str:
|
|
84 |
num_return_sequences=1,
|
85 |
pad_token_id=50256
|
86 |
)
|
87 |
-
return response[0]['generated_text']
|
88 |
except Exception as e:
|
89 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
90 |
return "حدث خطأ في توليد الرد."
|
91 |
|
92 |
async def text_to_speech(text: str) -> None:
|
93 |
try:
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
96 |
except Exception as e:
|
97 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
98 |
|
@@ -107,12 +117,8 @@ async def process_voice(update: Update, context):
|
|
107 |
await voice_file.download_to_drive("user_voice.ogg")
|
108 |
|
109 |
user_text = await speech_to_text("user_voice.ogg")
|
110 |
-
if not user_text:
|
111 |
-
await update.message.reply_text("لم أتمكن من فهم الصوت. يرجى المحاولة مرة أخرى.")
|
112 |
-
return
|
113 |
-
|
114 |
bot_response = await generate_response(user_text, str(user_id))
|
115 |
-
await
|
116 |
|
117 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
118 |
await update.message.reply_voice("bot_response_enhanced.wav")
|
|
|
8 |
from pydub import AudioSegment
|
9 |
from telegram import Update
|
10 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
11 |
+
from transformers import pipeline, AutoTokenizer, VitsModel
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
from collections import defaultdict
|
|
|
32 |
token=os.getenv("HF_TOKEN")
|
33 |
)
|
34 |
|
35 |
+
# 2. نموذج توليف الصوت الأنثوي (الاسم الصحيح)
|
36 |
+
tts_tokenizer = AutoTokenizer.from_pretrained(
|
37 |
+
"facebook/mms-tts-ara", # تم تغيير النموذج إلى فيسبوك MMS
|
38 |
+
token=os.getenv("HF_TOKEN")
|
39 |
+
)
|
40 |
+
tts_model = VitsModel.from_pretrained(
|
41 |
+
"facebook/mms-tts-ara",
|
42 |
+
token=os.getenv("HF_TOKEN")
|
43 |
+
)
|
44 |
|
45 |
except Exception as e:
|
46 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
|
|
91 |
num_return_sequences=1,
|
92 |
pad_token_id=50256
|
93 |
)
|
94 |
+
return response[0]['generated_text']
|
95 |
except Exception as e:
|
96 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
97 |
return "حدث خطأ في توليد الرد."
|
98 |
|
99 |
async def text_to_speech(text: str) -> None:
|
100 |
try:
|
101 |
+
inputs = tts_tokenizer(text, return_tensors="pt")
|
102 |
+
with torch.no_grad():
|
103 |
+
output = tts_model(**inputs, speaker_id=1) # اختيار الصوت الأنثوي
|
104 |
+
waveform = output.waveform[0].numpy()
|
105 |
+
sf.write("bot_response.wav", waveform, tts_model.config.sampling_rate)
|
106 |
except Exception as e:
|
107 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
108 |
|
|
|
117 |
await voice_file.download_to_drive("user_voice.ogg")
|
118 |
|
119 |
user_text = await speech_to_text("user_voice.ogg")
|
|
|
|
|
|
|
|
|
120 |
bot_response = await generate_response(user_text, str(user_id))
|
121 |
+
await text_to_speech(bot_response)
|
122 |
|
123 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
124 |
await update.message.reply_voice("bot_response_enhanced.wav")
|