Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,15 +3,16 @@ import logging
|
|
3 |
import threading
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
-
import librosa
|
7 |
import soundfile as sf
|
8 |
from pydub import AudioSegment
|
9 |
from telegram import Update
|
10 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
11 |
-
from transformers import
|
|
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
-
|
|
|
15 |
|
16 |
# ===== تهيئة التوكن =====
|
17 |
login(token=os.getenv("HF_TOKEN"))
|
@@ -25,39 +26,29 @@ logger = logging.getLogger(__name__)
|
|
25 |
|
26 |
# ===== تحميل النماذج =====
|
27 |
try:
|
28 |
-
# 1. نموذج التعرف على الكلام
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
)
|
34 |
-
|
35 |
-
# 2. نموذج توليف الصوت الأنثوي (الاسم الصحيح)
|
36 |
-
tts_tokenizer = AutoTokenizer.from_pretrained(
|
37 |
-
"facebook/mms-tts-ara", # تم تغيير النموذج إلى فيسبوك MMS
|
38 |
-
token=os.getenv("HF_TOKEN")
|
39 |
-
)
|
40 |
-
tts_model = VitsModel.from_pretrained(
|
41 |
-
"facebook/mms-tts-ara",
|
42 |
-
token=os.getenv("HF_TOKEN")
|
43 |
-
)
|
44 |
|
45 |
except Exception as e:
|
46 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
47 |
raise
|
48 |
|
49 |
# ===== ذاكرة المحادثة =====
|
50 |
-
conversation_history =
|
|
|
51 |
|
52 |
# ===== دوال معالجة الصوت =====
|
53 |
def enhance_audio(input_path: str, output_path: str) -> bool:
|
54 |
try:
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
audio.export(output_path, format="wav")
|
61 |
return True
|
62 |
except Exception as e:
|
63 |
logger.error(f"خطأ في تحسين الصوت: {str(e)}")
|
@@ -65,65 +56,73 @@ def enhance_audio(input_path: str, output_path: str) -> bool:
|
|
65 |
|
66 |
async def speech_to_text(audio_path: str) -> str:
|
67 |
try:
|
68 |
-
|
69 |
-
sf.write("temp.wav", audio, sr)
|
70 |
-
result = asr_pipeline("temp.wav")
|
71 |
-
return result["text"]
|
72 |
except Exception as e:
|
73 |
logger.error(f"فشل التعرف على الصوت: {str(e)}")
|
74 |
return ""
|
75 |
|
76 |
async def generate_response(text: str, user_id: str) -> str:
|
77 |
try:
|
78 |
-
#
|
79 |
-
conversation_history[
|
80 |
-
|
|
|
|
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
context,
|
91 |
num_return_sequences=1,
|
92 |
-
|
|
|
|
|
93 |
)
|
94 |
-
|
|
|
|
|
95 |
except Exception as e:
|
96 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
97 |
-
return "حدث خطأ
|
98 |
|
99 |
async def text_to_speech(text: str) -> None:
|
100 |
try:
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
except Exception as e:
|
107 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
108 |
|
109 |
# ===== دوال التفاعل مع المستخدم =====
|
110 |
async def start(update: Update, context):
|
111 |
-
await update.message.reply_text("مرحبًا! أنا
|
112 |
|
113 |
async def process_voice(update: Update, context):
|
114 |
try:
|
115 |
-
user_id = update.message.from_user.id
|
116 |
voice_file = await update.message.voice.get_file()
|
117 |
await voice_file.download_to_drive("user_voice.ogg")
|
118 |
|
|
|
119 |
user_text = await speech_to_text("user_voice.ogg")
|
120 |
if not user_text:
|
121 |
await update.message.reply_text("لم أتمكن من فهم الصوت. يرجى المحاولة مرة أخرى.")
|
122 |
return
|
123 |
|
124 |
-
|
|
|
|
|
|
|
125 |
await text_to_speech(bot_response)
|
126 |
|
|
|
127 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
128 |
await update.message.reply_voice("bot_response_enhanced.wav")
|
129 |
else:
|
@@ -142,10 +141,7 @@ def run_bot():
|
|
142 |
application.add_handler(CommandHandler("start", start))
|
143 |
application.add_handler(MessageHandler(filters.VOICE, process_voice))
|
144 |
|
145 |
-
application.run_polling(
|
146 |
-
close_loop=False,
|
147 |
-
stop_signals=[]
|
148 |
-
)
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
bot_thread = threading.Thread(target=run_bot, daemon=True)
|
|
|
3 |
import threading
|
4 |
import numpy as np
|
5 |
import torch
|
|
|
6 |
import soundfile as sf
|
7 |
from pydub import AudioSegment
|
8 |
from telegram import Update
|
9 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
+
from TTS.api import TTS
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
+
import noisereduce as nr
|
15 |
+
from scipy.io import wavfile
|
16 |
|
17 |
# ===== تهيئة التوكن =====
|
18 |
login(token=os.getenv("HF_TOKEN"))
|
|
|
26 |
|
27 |
# ===== تحميل النماذج =====
|
28 |
try:
|
29 |
+
# 1. نموذج التعرف على الكلام (محدث)
|
30 |
+
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
|
31 |
+
|
32 |
+
# 2. نموذج الذكاء الاصطناعي (محدث)
|
33 |
+
tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2-mega")
|
34 |
+
model = AutoModelForCausalLM.from_pretrained("aubmindlab/aragpt2-mega")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
except Exception as e:
|
37 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
38 |
raise
|
39 |
|
40 |
# ===== ذاكرة المحادثة =====
|
41 |
+
conversation_history = {}
|
42 |
+
MAX_HISTORY = 3
|
43 |
|
44 |
# ===== دوال معالجة الصوت =====
|
45 |
def enhance_audio(input_path: str, output_path: str) -> bool:
|
46 |
try:
|
47 |
+
# تقليل الضوضاء وتحسين الجودة
|
48 |
+
rate, data = wavfile.read(input_path)
|
49 |
+
reduced_noise = nr.reduce_noise(y=data, sr=rate, stationary=True)
|
50 |
+
normalized = reduced_noise / np.max(np.abs(reduced_noise))
|
51 |
+
wavfile.write(output_path, rate, normalized)
|
|
|
52 |
return True
|
53 |
except Exception as e:
|
54 |
logger.error(f"خطأ في تحسين الصوت: {str(e)}")
|
|
|
56 |
|
57 |
async def speech_to_text(audio_path: str) -> str:
|
58 |
try:
|
59 |
+
return tts.transcribe(audio_path, language="ar")
|
|
|
|
|
|
|
60 |
except Exception as e:
|
61 |
logger.error(f"فشل التعرف على الصوت: {str(e)}")
|
62 |
return ""
|
63 |
|
64 |
async def generate_response(text: str, user_id: str) -> str:
|
65 |
try:
|
66 |
+
# إدارة ذاكرة المحادثة
|
67 |
+
history = conversation_history.get(user_id, [])
|
68 |
+
history.append(text)
|
69 |
+
history = history[-MAX_HISTORY:]
|
70 |
+
conversation_history[user_id] = history
|
71 |
|
72 |
+
# توليد الرد مع ضوابط الجودة
|
73 |
+
input_text = "المحادثة:\n" + "\n".join([f"أنت: {msg}" for msg in history]) + "\nالبوت:"
|
74 |
+
|
75 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
76 |
+
|
77 |
+
outputs = model.generate(
|
78 |
+
inputs,
|
79 |
+
max_length=200,
|
|
|
80 |
num_return_sequences=1,
|
81 |
+
temperature=0.7,
|
82 |
+
repetition_penalty=1.5,
|
83 |
+
pad_token_id=tokenizer.eos_token_id
|
84 |
)
|
85 |
+
|
86 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("البوت:")[-1].strip()
|
87 |
+
return response.split("\n")[0]
|
88 |
except Exception as e:
|
89 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
90 |
+
return "عذرًا، حدث خطأ ما. يرجى المحاولة لاحقًا."
|
91 |
|
92 |
async def text_to_speech(text: str) -> None:
|
93 |
try:
|
94 |
+
tts.tts_to_file(
|
95 |
+
text=text,
|
96 |
+
file_path="bot_response.wav",
|
97 |
+
speaker="ar_female_1", # صوت أنثوي
|
98 |
+
language="ar"
|
99 |
+
)
|
100 |
except Exception as e:
|
101 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
102 |
|
103 |
# ===== دوال التفاعل مع المستخدم =====
|
104 |
async def start(update: Update, context):
|
105 |
+
await update.message.reply_text("مرحبًا! أنا البوت الصوتي الأنثوي 🎤\nأرسل لي رسالة صوتية وسأرد عليك بصوت أنثوي واضح.")
|
106 |
|
107 |
async def process_voice(update: Update, context):
|
108 |
try:
|
109 |
+
user_id = str(update.message.from_user.id)
|
110 |
voice_file = await update.message.voice.get_file()
|
111 |
await voice_file.download_to_drive("user_voice.ogg")
|
112 |
|
113 |
+
# تحويل الصوت إلى نص
|
114 |
user_text = await speech_to_text("user_voice.ogg")
|
115 |
if not user_text:
|
116 |
await update.message.reply_text("لم أتمكن من فهم الصوت. يرجى المحاولة مرة أخرى.")
|
117 |
return
|
118 |
|
119 |
+
# توليد الرد
|
120 |
+
bot_response = await generate_response(user_text, user_id)
|
121 |
+
|
122 |
+
# تحويل الرد إلى صوت
|
123 |
await text_to_speech(bot_response)
|
124 |
|
125 |
+
# تحسين الصوت
|
126 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
127 |
await update.message.reply_voice("bot_response_enhanced.wav")
|
128 |
else:
|
|
|
141 |
application.add_handler(CommandHandler("start", start))
|
142 |
application.add_handler(MessageHandler(filters.VOICE, process_voice))
|
143 |
|
144 |
+
application.run_polling()
|
|
|
|
|
|
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
bot_thread = threading.Thread(target=run_bot, daemon=True)
|