Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import BlipForImageTextRetrieval, AutoProcessor, WhisperForConditionalGeneration, AutoTokenizer | |
from gtts import gTTS | |
import speech_recognition as sr | |
import torch | |
from PIL import Image | |
# تحميل النماذج والمعالجات | |
image_model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco") | |
image_processor = AutoProcessor.from_pretrained("Salesforce/blip-itm-base-coco") | |
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") | |
whisper_tokenizer = AutoTokenizer.from_pretrained("openai/whisper-base") | |
# دالة مطابقة الصورة مع النص | |
def image_text_matching(img, text): | |
raw_image = img.convert('RGB') | |
inputs = image_processor(images=raw_image, text=text, return_tensors="pt") | |
outputs = image_model(**inputs) | |
result = outputs[0][0] | |
softmax_result = torch.softmax(result, dim=0) | |
max_index = torch.argmax(softmax_result).item() | |
return 'Match' if max_index == 1 else 'No Match' | |
# دالة تحويل النص إلى صوت | |
def text_to_audio(text): | |
tts = gTTS(text=text, lang='en') # يمكنك تعديل اللغة إلى 'ar' للنصوص العربية | |
audio_file = "output.mp3" | |
tts.save(audio_file) | |
return audio_file | |
# دالة تحويل الصوت إلى نص | |
def audio_to_text(audio): | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(audio) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data, language='ar') | |
return text | |
# إعداد واجهة Gradio | |
iface = gr.Interface( | |
fn=lambda img, text, audio: ( | |
image_text_matching(img, text), | |
text_to_audio(text), | |
audio_to_text(audio) if audio else "No audio uploaded" | |
), | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Textbox(label="Enter Text"), | |
gr.Audio(label="Upload Audio", type="filepath") # تعديل هنا | |
], | |
outputs=["text", "audio", "text"], | |
title="AI Project: Image-Text Matching and Audio Tasks", | |
description="Upload an image and enter text to see if they match. Also, convert text to audio and audio to text." | |
) | |
# تشغيل الواجهة | |
iface.launch() | |