Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,15 @@ import logging
|
|
3 |
import threading
|
4 |
import numpy as np
|
5 |
import torch
|
|
|
6 |
import soundfile as sf
|
7 |
from pydub import AudioSegment
|
8 |
from telegram import Update
|
9 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
10 |
-
from transformers import AutoTokenizer,
|
11 |
-
from TTS.api import TTS
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
-
|
15 |
-
from scipy.io import wavfile
|
16 |
|
17 |
# ===== تهيئة التوكن =====
|
18 |
login(token=os.getenv("HF_TOKEN"))
|
@@ -26,29 +25,39 @@ logger = logging.getLogger(__name__)
|
|
26 |
|
27 |
# ===== تحميل النماذج =====
|
28 |
try:
|
29 |
-
# 1. نموذج التعرف على الكلام
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
except Exception as e:
|
37 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
38 |
raise
|
39 |
|
40 |
# ===== ذاكرة المحادثة =====
|
41 |
-
conversation_history =
|
42 |
-
MAX_HISTORY = 3
|
43 |
|
44 |
# ===== دوال معالجة الصوت =====
|
45 |
def enhance_audio(input_path: str, output_path: str) -> bool:
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
52 |
return True
|
53 |
except Exception as e:
|
54 |
logger.error(f"خطأ في تحسين الصوت: {str(e)}")
|
@@ -56,73 +65,65 @@ def enhance_audio(input_path: str, output_path: str) -> bool:
|
|
56 |
|
57 |
async def speech_to_text(audio_path: str) -> str:
|
58 |
try:
|
59 |
-
|
|
|
|
|
|
|
60 |
except Exception as e:
|
61 |
logger.error(f"فشل التعرف على الصوت: {str(e)}")
|
62 |
return ""
|
63 |
|
64 |
async def generate_response(text: str, user_id: str) -> str:
|
65 |
try:
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
history = history[-MAX_HISTORY:]
|
70 |
-
conversation_history[user_id] = history
|
71 |
-
|
72 |
-
# توليد الرد مع ضوابط الجودة
|
73 |
-
input_text = "المحادثة:\n" + "\n".join([f"أنت: {msg}" for msg in history]) + "\nالبوت:"
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
max_length=
|
80 |
-
num_return_sequences=1,
|
81 |
temperature=0.7,
|
82 |
-
repetition_penalty=1.5,
|
83 |
-
pad_token_id=tokenizer.eos_token_id
|
84 |
)
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
88 |
except Exception as e:
|
89 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
90 |
-
return "
|
91 |
|
92 |
async def text_to_speech(text: str) -> None:
|
93 |
try:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
)
|
100 |
except Exception as e:
|
101 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
102 |
|
103 |
# ===== دوال التفاعل مع المستخدم =====
|
104 |
async def start(update: Update, context):
|
105 |
-
await update.message.reply_text("مرحبًا! أنا
|
106 |
|
107 |
async def process_voice(update: Update, context):
|
108 |
try:
|
109 |
-
user_id =
|
110 |
voice_file = await update.message.voice.get_file()
|
111 |
await voice_file.download_to_drive("user_voice.ogg")
|
112 |
|
113 |
-
# تحويل الصوت إلى نص
|
114 |
user_text = await speech_to_text("user_voice.ogg")
|
115 |
if not user_text:
|
116 |
await update.message.reply_text("لم أتمكن من فهم الصوت. يرجى المحاولة مرة أخرى.")
|
117 |
return
|
118 |
|
119 |
-
|
120 |
-
bot_response = await generate_response(user_text, user_id)
|
121 |
-
|
122 |
-
# تحويل الرد إلى صوت
|
123 |
await text_to_speech(bot_response)
|
124 |
|
125 |
-
# تحسين الصوت
|
126 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
127 |
await update.message.reply_voice("bot_response_enhanced.wav")
|
128 |
else:
|
@@ -141,7 +142,10 @@ def run_bot():
|
|
141 |
application.add_handler(CommandHandler("start", start))
|
142 |
application.add_handler(MessageHandler(filters.VOICE, process_voice))
|
143 |
|
144 |
-
application.run_polling(
|
|
|
|
|
|
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
bot_thread = threading.Thread(target=run_bot, daemon=True)
|
|
|
3 |
import threading
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
+
import librosa
|
7 |
import soundfile as sf
|
8 |
from pydub import AudioSegment
|
9 |
from telegram import Update
|
10 |
from telegram.ext import ApplicationBuilder, MessageHandler, filters, CommandHandler
|
11 |
+
from transformers import pipeline, AutoTokenizer, VitsModel
|
|
|
12 |
from huggingface_hub import login
|
13 |
import asyncio
|
14 |
+
from collections import defaultdict
|
|
|
15 |
|
16 |
# ===== تهيئة التوكن =====
|
17 |
login(token=os.getenv("HF_TOKEN"))
|
|
|
25 |
|
26 |
# ===== تحميل النماذج =====
|
27 |
try:
|
28 |
+
# 1. نموذج التعرف على الكلام
|
29 |
+
asr_pipeline = pipeline(
|
30 |
+
"automatic-speech-recognition",
|
31 |
+
model="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
|
32 |
+
token=os.getenv("HF_TOKEN")
|
33 |
+
)
|
34 |
+
|
35 |
+
# 2. نموذج توليف الصوت الأنثوي (نموذج دقيق)
|
36 |
+
tts_tokenizer = AutoTokenizer.from_pretrained(
|
37 |
+
"mohamedhossam/arabic-tts",
|
38 |
+
token=os.getenv("HF_TOKEN")
|
39 |
+
)
|
40 |
+
tts_model = VitsModel.from_pretrained(
|
41 |
+
"mohamedhossam/arabic-tts",
|
42 |
+
token=os.getenv("HF_TOKEN")
|
43 |
+
)
|
44 |
|
45 |
except Exception as e:
|
46 |
logger.error(f"فشل تحميل النماذج: {str(e)}")
|
47 |
raise
|
48 |
|
49 |
# ===== ذاكرة المحادثة =====
|
50 |
+
conversation_history = defaultdict(list)
|
|
|
51 |
|
52 |
# ===== دوال معالجة الصوت =====
|
53 |
def enhance_audio(input_path: str, output_path: str) -> bool:
|
54 |
try:
|
55 |
+
audio = AudioSegment.from_wav(input_path)
|
56 |
+
audio = audio.low_pass_filter(3000)
|
57 |
+
audio = audio.high_pass_filter(100)
|
58 |
+
audio = audio.normalize()
|
59 |
+
audio = audio.fade_in(150).fade_out(150)
|
60 |
+
audio.export(output_path, format="wav")
|
61 |
return True
|
62 |
except Exception as e:
|
63 |
logger.error(f"خطأ في تحسين الصوت: {str(e)}")
|
|
|
65 |
|
66 |
async def speech_to_text(audio_path: str) -> str:
|
67 |
try:
|
68 |
+
audio, sr = librosa.load(audio_path, sr=16000)
|
69 |
+
sf.write("temp.wav", audio, sr)
|
70 |
+
result = asr_pipeline("temp.wav")
|
71 |
+
return result["text"]
|
72 |
except Exception as e:
|
73 |
logger.error(f"فشل التعرف على الصوت: {str(e)}")
|
74 |
return ""
|
75 |
|
76 |
async def generate_response(text: str, user_id: str) -> str:
|
77 |
try:
|
78 |
+
# تحديث ذاكرة المحادثة
|
79 |
+
conversation_history[user_id].append(text)
|
80 |
+
context = "\n".join(conversation_history[user_id][-3:])
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
chatbot = pipeline(
|
83 |
+
"text-generation",
|
84 |
+
model="aubmindlab/aragpt2-base",
|
85 |
+
token=os.getenv("HF_TOKEN"),
|
86 |
+
max_length=50,
|
|
|
87 |
temperature=0.7,
|
|
|
|
|
88 |
)
|
89 |
+
response = chatbot(
|
90 |
+
context,
|
91 |
+
num_return_sequences=1,
|
92 |
+
pad_token_id=50256
|
93 |
+
)
|
94 |
+
return response[0]['generated_text'].strip() # إزالة المسافات الزائدة
|
95 |
except Exception as e:
|
96 |
logger.error(f"فشل توليد الرد: {str(e)}")
|
97 |
+
return "حدث خطأ في توليد الرد."
|
98 |
|
99 |
async def text_to_speech(text: str) -> None:
|
100 |
try:
|
101 |
+
inputs = tts_tokenizer(text, return_tensors="pt")
|
102 |
+
with torch.no_grad():
|
103 |
+
output = tts_model(**inputs, speaker_id=0) # اختيار الصوت الأنثوي
|
104 |
+
waveform = output.waveform[0].numpy()
|
105 |
+
sf.write("bot_response.wav", waveform, tts_model.config.sampling_rate)
|
|
|
106 |
except Exception as e:
|
107 |
logger.error(f"فشل تحويل النص إلى صوت: {str(e)}")
|
108 |
|
109 |
# ===== دوال التفاعل مع المستخدم =====
|
110 |
async def start(update: Update, context):
|
111 |
+
await update.message.reply_text("مرحبًا! أنا بوت الدردشة الصوتية الأنثوي 🎤\nأرسل لي رسالة صوتية وسأرد عليك بصوت أنثوي واضح.")
|
112 |
|
113 |
async def process_voice(update: Update, context):
|
114 |
try:
|
115 |
+
user_id = update.message.from_user.id
|
116 |
voice_file = await update.message.voice.get_file()
|
117 |
await voice_file.download_to_drive("user_voice.ogg")
|
118 |
|
|
|
119 |
user_text = await speech_to_text("user_voice.ogg")
|
120 |
if not user_text:
|
121 |
await update.message.reply_text("لم أتمكن من فهم الصوت. يرجى المحاولة مرة أخرى.")
|
122 |
return
|
123 |
|
124 |
+
bot_response = await generate_response(user_text, str(user_id))
|
|
|
|
|
|
|
125 |
await text_to_speech(bot_response)
|
126 |
|
|
|
127 |
if enhance_audio("bot_response.wav", "bot_response_enhanced.wav"):
|
128 |
await update.message.reply_voice("bot_response_enhanced.wav")
|
129 |
else:
|
|
|
142 |
application.add_handler(CommandHandler("start", start))
|
143 |
application.add_handler(MessageHandler(filters.VOICE, process_voice))
|
144 |
|
145 |
+
application.run_polling(
|
146 |
+
close_loop=False,
|
147 |
+
stop_signals=[]
|
148 |
+
)
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
bot_thread = threading.Thread(target=run_bot, daemon=True)
|