import telegram from telegram import Bot from telegram.ext import * import PIL.Image import os import google.generativeai as genai import requests import speech_recognition as sr import pydub from pydub import AudioSegment api_genai = os.environ.get("gemini_api") api_tele = os.environ.get("tele_api") genai.configure(api_key=f'{api_genai}') model = genai.GenerativeModel('gemini-pro-vision') bot = Bot(f"api_tele") a=[] def start_command(update, context): name = update.message.chat.first_name update.message.reply_text("Hello " + name) update.message.reply_text("Please share your image") def image_handler(update, context): #print(update) if update.message.photo: file = update.message.photo[-1].file_id obj = context.bot.get_file(file) obj.download(f'{file}.png') text=None a.append(file) else: text = update.message.text try: img = PIL.Image.open(f'{a[-1]}.png') except: img=None if img is not None and text is not None: print(text,a[-1]) model = genai.GenerativeModel('gemini-pro-vision') response = model.generate_content([img,text]) update.message.reply_text(response.text) text=None img=None os.remove(f'{a[-1]}.png') if text is not None and img is None: model = genai.GenerativeModel('gemini-pro') response = model.generate_content(text) update.message.reply_text(response.text) if update.message.voice: file_aud = update.message.voice.file_id obj = context.bot.get_file(file_aud) obj.download(f'{file_aud}.ogg') audio_file = f'{file_aud}.ogg' # Convert the audio file to WAV format sound = AudioSegment.from_ogg(audio_file) sound.export(f'{file_aud}.wav', format="wav") # Initialize the recognizer recognizer = sr.Recognizer() # Load the converted audio file audio_file = f'{file_aud}.wav' with sr.AudioFile(audio_file) as source: audio_data = recognizer.record(source) text1 = recognizer.recognize_google(audio_data) print(text1) model = genai.GenerativeModel('gemini-pro') response = model.generate_content(text1) update.message.reply_text(response.text) os.remove(f'{file_aud}.wav') os.remove(f'{file_aud}.ogg') def main(): print("Started") TOKEN = f"api_tele" updater = Updater(TOKEN) dp = updater.dispatcher dp.add_handler(CommandHandler("start", start_command)) dp.add_handler(MessageHandler(Filters.text | Filters.photo | Filters.audio | Filters.voice, image_handler)) updater.start_polling() updater.idle() if __name__ == '__main__': main()