Spaces:
Sleeping
Sleeping
| import os | |
| import speech_recognition as sr | |
| import fitz # PyMuPDF | |
| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| import faiss | |
| import numpy as np | |
| from gtts import gTTS | |
| from pydub import AudioSegment | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| import gradio as gr | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Initialize Groq API client | |
| client = Groq( | |
| api_key=os.getenv("GROQ_API_KEY"), | |
| ) | |
| # Initialize model and tokenizer for embedding | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
| model = AutoModel.from_pretrained("bert-base-uncased") | |
| # Initialize vector database | |
| dimension = 768 # Size of BERT embeddings | |
| index = faiss.IndexFlatL2(dimension) | |
| # Folder path containing PDFs | |
| pdf_folder_path = "pdfsforRAG" | |
| # Function to convert audio file to text | |
| def audio_to_text(audio_file): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio_file) as source: | |
| audio = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio) | |
| return text | |
| except sr.UnknownValueError: | |
| return "Sorry, I did not understand the audio" | |
| except sr.RequestError: | |
| return "Sorry, there was a problem with the request" | |
| # Function to convert audio to WAV format | |
| def convert_to_wav(audio_file_path): | |
| audio = AudioSegment.from_file(audio_file_path) | |
| wav_path = "temp_audio.wav" | |
| audio.export(wav_path, format="wav") | |
| return wav_path | |
| # Function to extract text from a PDF file | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| pdf_document = fitz.open(pdf_file) | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document.load_page(page_num) | |
| text += page.get_text() | |
| return text | |
| # Function to embed text using a transformer model | |
| def embed_text(texts, model, tokenizer): | |
| inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True) | |
| with torch.no_grad(): | |
| embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy() | |
| return embeddings | |
| # Function to convert text to speech | |
| def text_to_speech(text, output_file): | |
| tts = gTTS(text=text, lang='en') | |
| tts.save(output_file) | |
| return output_file | |
| # Read all PDF files from the specified folder | |
| pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')] | |
| texts = [] | |
| for path in pdf_paths: | |
| pdf_text = extract_text_from_pdf(path) | |
| texts.append(pdf_text) | |
| # Embed PDF texts and add to vector database | |
| embeddings = embed_text(texts, model, tokenizer) | |
| index.add(embeddings) | |
| # Gradio Interface | |
| def process_audio(audio_file): | |
| # Save the uploaded audio file | |
| audio_file_path = "temp_audio." + audio_file.name.split('.')[-1] | |
| with open(audio_file_path, "wb") as f: | |
| f.write(audio_file.read()) | |
| # Convert audio to WAV format if needed | |
| wav_path = convert_to_wav(audio_file_path) | |
| # Convert audio to text | |
| text = audio_to_text(wav_path) | |
| # Generate a response using the Groq API | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": text, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| ) | |
| response = chat_completion.choices[0].message.content | |
| # Convert advice to speech | |
| output_file = "advice.mp3" | |
| output_path = text_to_speech(response, output_file) | |
| return response, output_path | |
| # Define Gradio interface | |
| iface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.inputs.Audio(source="upload", type="file"), | |
| outputs=[gr.outputs.Textbox(label="Advice"), gr.outputs.Audio(label="Advice Audio")] | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| iface.launch() | |