Ayesha931 commited on
Commit
20a391c
·
verified ·
1 Parent(s): 77c5144

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import speech_recognition as sr
3
+ import fitz # PyMuPDF
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import torch
6
+ import faiss
7
+ import numpy as np
8
+ from gtts import gTTS
9
+ from pydub import AudioSegment
10
+ from groq import Groq
11
+ from dotenv import load_dotenv
12
+ import gradio as gr
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Initialize Groq API client
18
+ client = Groq(
19
+ api_key=os.getenv("GROQ_API_KEY"),
20
+ )
21
+
22
+ # Initialize model and tokenizer for embedding
23
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
24
+ model = AutoModel.from_pretrained("bert-base-uncased")
25
+
26
+ # Initialize vector database
27
+ dimension = 768 # Size of BERT embeddings
28
+ index = faiss.IndexFlatL2(dimension)
29
+
30
+ # Folder path containing PDFs
31
+ pdf_folder_path = "pdfsforRAG"
32
+
33
+ # Function to convert audio file to text
34
+ def audio_to_text(audio_file_path):
35
+ recognizer = sr.Recognizer()
36
+ with sr.AudioFile(audio_file_path) as source:
37
+ audio = recognizer.record(source)
38
+ try:
39
+ text = recognizer.recognize_google(audio)
40
+ return text
41
+ except sr.UnknownValueError:
42
+ return "Sorry, I did not understand the audio"
43
+ except sr.RequestError:
44
+ return "Sorry, there was a problem with the request"
45
+
46
+ # Function to convert audio to WAV format
47
+ def convert_to_wav(audio_file_path):
48
+ audio = AudioSegment.from_file(audio_file_path)
49
+ wav_path = "temp_audio.wav"
50
+ audio.export(wav_path, format="wav")
51
+ return wav_path
52
+
53
+ # Function to extract text from a PDF file
54
+ def extract_text_from_pdf(pdf_file):
55
+ text = ""
56
+ pdf_document = fitz.open(pdf_file)
57
+ for page_num in range(len(pdf_document)):
58
+ page = pdf_document.load_page(page_num)
59
+ text += page.get_text()
60
+ return text
61
+
62
+ # Function to embed text using a transformer model
63
+ def embed_text(texts, model, tokenizer):
64
+ inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
65
+ with torch.no_grad():
66
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
67
+ return embeddings
68
+
69
+ # Function to convert text to speech
70
+ def text_to_speech(text, output_file):
71
+ tts = gTTS(text=text, lang='en')
72
+ tts.save(output_file)
73
+ return output_file
74
+
75
+ # Read all PDF files from the specified folder
76
+ pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
77
+
78
+ texts = []
79
+ for path in pdf_paths:
80
+ pdf_text = extract_text_from_pdf(path)
81
+ texts.append(pdf_text)
82
+
83
+ # Embed PDF texts and add to vector database
84
+ embeddings = embed_text(texts, model, tokenizer)
85
+ index.add(embeddings)
86
+
87
+ # Gradio Interface
88
+ def process_audio(audio_file_path):
89
+ # Convert audio to WAV format if needed
90
+ wav_path = convert_to_wav(audio_file_path)
91
+
92
+ # Convert audio to text
93
+ text = audio_to_text(wav_path)
94
+
95
+ # Generate a response using the Groq API
96
+ chat_completion = client.chat.completions.create(
97
+ messages=[
98
+ {
99
+ "role": "user",
100
+ "content": text,
101
+ }
102
+ ],
103
+ model="llama3-8b-8192",
104
+ )
105
+ response = chat_completion.choices[0].message.content
106
+
107
+ # Convert advice to speech
108
+ output_file = "advice.mp3"
109
+ output_path = text_to_speech(response, output_file)
110
+
111
+ return response, output_path
112
+
113
+ # Define Gradio interface
114
+ iface = gr.Interface(
115
+ fn=process_audio,
116
+ inputs=gr.Audio(type="filepath"), # Handle file paths
117
+ outputs=[gr.Textbox(label="Advice"), gr.Audio(label="Advice Audio")]
118
+ )
119
+
120
+ # Launch the Gradio app
121
+ if __name__ == "__main__":
122
+ iface.launch()