import os import speech_recognition as sr import fitz # PyMuPDF from transformers import AutoTokenizer, AutoModel import torch import faiss import numpy as np from gtts import gTTS from pydub import AudioSegment from groq import Groq from dotenv import load_dotenv import gradio as gr # Load environment variables from .env file load_dotenv() # Initialize Groq API client client = Groq( api_key=os.getenv("GROQ_API_KEY"), ) # Initialize model and tokenizer for embedding tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") model = AutoModel.from_pretrained("bert-base-uncased") # Initialize vector database dimension = 768 # Size of BERT embeddings index = faiss.IndexFlatL2(dimension) # Folder path containing PDFs pdf_folder_path = "pdfsforRAG" # Function to convert audio file to text def audio_to_text(audio_file_path): recognizer = sr.Recognizer() with sr.AudioFile(audio_file_path) as source: audio = recognizer.record(source) try: text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: return "Sorry, I did not understand the audio" except sr.RequestError: return "Sorry, there was a problem with the request" # Function to convert audio to WAV format def convert_to_wav(audio_file_path): audio = AudioSegment.from_file(audio_file_path) wav_path = "temp_audio.wav" audio.export(wav_path, format="wav") return wav_path # Function to extract text from a PDF file def extract_text_from_pdf(pdf_file): text = "" pdf_document = fitz.open(pdf_file) for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) text += page.get_text() return text # Function to embed text using a transformer model def embed_text(texts, model, tokenizer): inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True) with torch.no_grad(): embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy() return embeddings # Function to convert text to speech def text_to_speech(text, output_file): tts = gTTS(text=text, lang='en') tts.save(output_file) return output_file # Read all PDF files from the specified folder pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')] texts = [] for path in pdf_paths: pdf_text = extract_text_from_pdf(path) texts.append(pdf_text) # Embed PDF texts and add to vector database embeddings = embed_text(texts, model, tokenizer) index.add(embeddings) # Gradio Interface def process_audio(audio_file_path): # Convert audio to WAV format if needed wav_path = convert_to_wav(audio_file_path) # Convert audio to text text = audio_to_text(wav_path) # Generate a response using the Groq API chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": text, } ], model="llama3-8b-8192", ) response = chat_completion.choices[0].message.content # Convert advice to speech output_file = "advice.mp3" output_path = text_to_speech(response, output_file) return response, output_path # Define Gradio interface iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), # Handle file paths outputs=[gr.Textbox(label="Advice"), gr.Audio(label="Advice Audio")] ) # Launch the Gradio app if __name__ == "__main__": iface.launch()