File size: 2,705 Bytes
f20dbc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import telegram
from telegram import Bot
from telegram.ext import *
import PIL.Image
import os
import google.generativeai as genai
import requests
import speech_recognition as sr
import pydub
from pydub import AudioSegment
api_genai = os.environ.get("gemini_api")
api_tele = os.environ.get("tele_api")
genai.configure(api_key=f'{api_genai}')
model = genai.GenerativeModel('gemini-pro-vision')
bot = Bot(f"api_tele")
a=[]


def start_command(update, context):
    name = update.message.chat.first_name
    update.message.reply_text("Hello " + name)
    update.message.reply_text("Please share your image")

def image_handler(update, context):
    #print(update)
    if update.message.photo:
      file = update.message.photo[-1].file_id
      obj = context.bot.get_file(file)
      obj.download(f'{file}.png')
      text=None
      a.append(file)
    else:
      text = update.message.text


    try:
      img = PIL.Image.open(f'{a[-1]}.png')
    except:
      img=None

    if img is not None and text is not None:
      print(text,a[-1])
      model = genai.GenerativeModel('gemini-pro-vision')
      response = model.generate_content([img,text])
      update.message.reply_text(response.text)
      text=None
      img=None
      os.remove(f'{a[-1]}.png')
    if text is not None and img is None:
      model = genai.GenerativeModel('gemini-pro')
      response = model.generate_content(text)
      update.message.reply_text(response.text)
    if update.message.voice:
      file_aud = update.message.voice.file_id
      obj = context.bot.get_file(file_aud)
      obj.download(f'{file_aud}.ogg')
      audio_file = f'{file_aud}.ogg'

      # Convert the audio file to WAV format
      sound = AudioSegment.from_ogg(audio_file)
      sound.export(f'{file_aud}.wav', format="wav")

      # Initialize the recognizer
      recognizer = sr.Recognizer()

      # Load the converted audio file
      audio_file = f'{file_aud}.wav'

      with sr.AudioFile(audio_file) as source:
          audio_data = recognizer.record(source)
      text1 = recognizer.recognize_google(audio_data)
      print(text1)
      model = genai.GenerativeModel('gemini-pro')
      response = model.generate_content(text1)
      update.message.reply_text(response.text)
      os.remove(f'{file_aud}.wav')
      os.remove(f'{file_aud}.ogg')


def main():
    print("Started")
    TOKEN = f"api_tele"
    updater = Updater(TOKEN, use_context = True)
    dp = updater.dispatcher
    dp.add_handler(CommandHandler("start", start_command))
    dp.add_handler(MessageHandler(Filters.text | Filters.photo | Filters.audio | Filters.voice, image_handler))

    updater.start_polling()
    updater.idle()

if __name__ == '__main__':
    main()