Spaces:

Ayesha931
/

ChillMama3.0

Runtime error

App Files Files Community

Ayesha931 commited on Aug 31, 2024

Commit

20a391c

verified ·

1 Parent(s): 77c5144

Create app.py

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os
+import speech_recognition as sr
+import fitz  # PyMuPDF
+from transformers import AutoTokenizer, AutoModel
+import torch
+import faiss
+import numpy as np
+from gtts import gTTS
+from pydub import AudioSegment
+from groq import Groq
+from dotenv import load_dotenv
+import gradio as gr
+# Load environment variables from .env file
+load_dotenv()
+# Initialize Groq API client
+client = Groq(
+    api_key=os.getenv("GROQ_API_KEY"),
+)
+# Initialize model and tokenizer for embedding
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+model = AutoModel.from_pretrained("bert-base-uncased")
+# Initialize vector database
+dimension = 768  # Size of BERT embeddings
+index = faiss.IndexFlatL2(dimension)
+# Folder path containing PDFs
+pdf_folder_path = "pdfsforRAG"
+# Function to convert audio file to text
+def audio_to_text(audio_file_path):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file_path) as source:
+        audio = recognizer.record(source)
+    try:
+        text = recognizer.recognize_google(audio)
+        return text
+    except sr.UnknownValueError:
+        return "Sorry, I did not understand the audio"
+    except sr.RequestError:
+        return "Sorry, there was a problem with the request"
+# Function to convert audio to WAV format
+def convert_to_wav(audio_file_path):
+    audio = AudioSegment.from_file(audio_file_path)
+    wav_path = "temp_audio.wav"
+    audio.export(wav_path, format="wav")
+    return wav_path
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    pdf_document = fitz.open(pdf_file)
+    for page_num in range(len(pdf_document)):
+        page = pdf_document.load_page(page_num)
+        text += page.get_text()
+    return text
+# Function to embed text using a transformer model
+def embed_text(texts, model, tokenizer):
+    inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
+    with torch.no_grad():
+        embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
+    return embeddings
+# Function to convert text to speech
+def text_to_speech(text, output_file):
+    tts = gTTS(text=text, lang='en')
+    tts.save(output_file)
+    return output_file
+# Read all PDF files from the specified folder
+pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
+texts = []
+for path in pdf_paths:
+    pdf_text = extract_text_from_pdf(path)
+    texts.append(pdf_text)
+# Embed PDF texts and add to vector database
+embeddings = embed_text(texts, model, tokenizer)
+index.add(embeddings)
+# Gradio Interface
+def process_audio(audio_file_path):
+    # Convert audio to WAV format if needed
+    wav_path = convert_to_wav(audio_file_path)
+    # Convert audio to text
+    text = audio_to_text(wav_path)
+    # Generate a response using the Groq API
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": text,
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+    response = chat_completion.choices[0].message.content
+    # Convert advice to speech
+    output_file = "advice.mp3"
+    output_path = text_to_speech(response, output_file)
+    return response, output_path
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath"),  # Handle file paths
+    outputs=[gr.Textbox(label="Advice"), gr.Audio(label="Advice Audio")]
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()