ParentHug / app.py
Ayesha931's picture
Update app.py
1f14237 verified
import streamlit as st
import os
import speech_recognition as sr
import fitz # PyMuPDF
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
from gtts import gTTS
from pydub import AudioSegment
# Function to convert audio file to text
def audio_to_text(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Sorry, I did not understand the audio"
except sr.RequestError:
return "Sorry, there was a problem with the request"
# Function to convert audio to WAV format
def convert_to_wav(audio_file_path):
audio = AudioSegment.from_file(audio_file_path)
wav_path = "temp_audio.wav"
audio.export(wav_path, format="wav")
return wav_path
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
text = ""
pdf_document = fitz.open(pdf_file)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text += page.get_text()
return text
# Function to embed text using a transformer model
def embed_text(texts, model, tokenizer):
inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
return embeddings
# Function to convert text to speech
def text_to_speech(text, output_file):
tts = gTTS(text=text, lang='en')
tts.save(output_file)
return output_file
# Initialize model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
# Initialize vector database
dimension = 768 # Size of BERT embeddings
index = faiss.IndexFlatL2(dimension)
# Folder path containing PDFs
pdf_folder_path = "pdfsforRAG"
# Read all PDF files from the specified folder
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
texts = []
for path in pdf_paths:
pdf_text = extract_text_from_pdf(path)
texts.append(pdf_text)
# Embed PDF texts and add to vector database
embeddings = embed_text(texts, model, tokenizer)
index.add(embeddings)
# Streamlit application
st.title("Parenting Guide App")
# Upload an audio file
audio_file = st.file_uploader("Record and upload your audio file (WAV/MP3)", type=["wav", "mp3"])
if audio_file:
st.write("Processing...")
# Save the uploaded audio file
with open("temp_audio.mp3", "wb") as f:
f.write(audio_file.getbuffer())
# Convert audio to WAV format if needed
wav_path = convert_to_wav("temp_audio.mp3")
# Convert audio to text
text = audio_to_text(wav_path)
st.write("Voice command:", text)
# Find relevant advice
query_embedding = embed_text([text], model, tokenizer)
D, I = index.search(query_embedding, k=1) # Search for the most similar advice
closest_text = texts[I[0][0]]
st.write("Advice:", closest_text)
# Convert advice to speech
output_file = "advice.mp3"
output_path = text_to_speech(closest_text, output_file)
st.audio(output_path)