Spaces:

Ayesha931
/

ParentHug

Sleeping

File size: 3,283 Bytes

ca5e2f2
aa64ec8
ca5e2f2
 
aa64ec8
15f1f07
 
 
ca5e2f2
ae08f46
ca5e2f2
950e4c6
 
 
 
 
 
 
 
 
 
 
 
 
ae08f46
 
 
 
 
 
 
950e4c6
 
 
 
 
 
 
 
ca5e2f2
950e4c6
 
 
 
 
 
 
 
 
 
 
 
ca5e2f2
15f1f07
 
 
 
 
 
 
 
aa64ec8
1f14237
aa64ec8
 
 
 
15f1f07
 
 
 
 
 
 
 
 
950e4c6
 
e9ec531
950e4c6
 
ca5e2f2
950e4c6
 
ca5e2f2
950e4c6
ae08f46
950e4c6
ca5e2f2
ae08f46
 
 
950e4c6
ae08f46
950e4c6
ca5e2f2
950e4c6
 
 
 
 
 
ca5e2f2
950e4c6

import streamlit as st
import os
import speech_recognition as sr
import fitz  # PyMuPDF
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
from gtts import gTTS
from pydub import AudioSegment

# Function to convert audio file to text
def audio_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        return "Sorry, I did not understand the audio"
    except sr.RequestError:
        return "Sorry, there was a problem with the request"

# Function to convert audio to WAV format
def convert_to_wav(audio_file_path):
    audio = AudioSegment.from_file(audio_file_path)
    wav_path = "temp_audio.wav"
    audio.export(wav_path, format="wav")
    return wav_path

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    text = ""
    pdf_document = fitz.open(pdf_file)
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        text += page.get_text()
    return text

# Function to embed text using a transformer model
def embed_text(texts, model, tokenizer):
    inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
    return embeddings

# Function to convert text to speech
def text_to_speech(text, output_file):
    tts = gTTS(text=text, lang='en')
    tts.save(output_file)
    return output_file

# Initialize model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

# Initialize vector database
dimension = 768  # Size of BERT embeddings
index = faiss.IndexFlatL2(dimension)

# Folder path containing PDFs
pdf_folder_path = "pdfsforRAG"

# Read all PDF files from the specified folder
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]

texts = []
for path in pdf_paths:
    pdf_text = extract_text_from_pdf(path)
    texts.append(pdf_text)

# Embed PDF texts and add to vector database
embeddings = embed_text(texts, model, tokenizer)
index.add(embeddings)

# Streamlit application
st.title("Parenting Guide App")

# Upload an audio file
audio_file = st.file_uploader("Record and upload your audio file (WAV/MP3)", type=["wav", "mp3"])

if audio_file:
    st.write("Processing...")

    # Save the uploaded audio file
    with open("temp_audio.mp3", "wb") as f:
        f.write(audio_file.getbuffer())

    # Convert audio to WAV format if needed
    wav_path = convert_to_wav("temp_audio.mp3")

    # Convert audio to text
    text = audio_to_text(wav_path)
    st.write("Voice command:", text)

    # Find relevant advice
    query_embedding = embed_text([text], model, tokenizer)
    D, I = index.search(query_embedding, k=1)  # Search for the most similar advice
    closest_text = texts[I[0][0]]
    
    st.write("Advice:", closest_text)

    # Convert advice to speech
    output_file = "advice.mp3"
    output_path = text_to_speech(closest_text, output_file)
    st.audio(output_path)