File size: 3,283 Bytes
ca5e2f2 aa64ec8 ca5e2f2 aa64ec8 15f1f07 ca5e2f2 ae08f46 ca5e2f2 950e4c6 ae08f46 950e4c6 ca5e2f2 950e4c6 ca5e2f2 15f1f07 aa64ec8 1f14237 aa64ec8 15f1f07 950e4c6 e9ec531 950e4c6 ca5e2f2 950e4c6 ca5e2f2 950e4c6 ae08f46 950e4c6 ca5e2f2 ae08f46 950e4c6 ae08f46 950e4c6 ca5e2f2 950e4c6 ca5e2f2 950e4c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import streamlit as st
import os
import speech_recognition as sr
import fitz # PyMuPDF
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
from gtts import gTTS
from pydub import AudioSegment
# Function to convert audio file to text
def audio_to_text(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Sorry, I did not understand the audio"
except sr.RequestError:
return "Sorry, there was a problem with the request"
# Function to convert audio to WAV format
def convert_to_wav(audio_file_path):
audio = AudioSegment.from_file(audio_file_path)
wav_path = "temp_audio.wav"
audio.export(wav_path, format="wav")
return wav_path
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
text = ""
pdf_document = fitz.open(pdf_file)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text += page.get_text()
return text
# Function to embed text using a transformer model
def embed_text(texts, model, tokenizer):
inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
return embeddings
# Function to convert text to speech
def text_to_speech(text, output_file):
tts = gTTS(text=text, lang='en')
tts.save(output_file)
return output_file
# Initialize model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
# Initialize vector database
dimension = 768 # Size of BERT embeddings
index = faiss.IndexFlatL2(dimension)
# Folder path containing PDFs
pdf_folder_path = "pdfsforRAG"
# Read all PDF files from the specified folder
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
texts = []
for path in pdf_paths:
pdf_text = extract_text_from_pdf(path)
texts.append(pdf_text)
# Embed PDF texts and add to vector database
embeddings = embed_text(texts, model, tokenizer)
index.add(embeddings)
# Streamlit application
st.title("Parenting Guide App")
# Upload an audio file
audio_file = st.file_uploader("Record and upload your audio file (WAV/MP3)", type=["wav", "mp3"])
if audio_file:
st.write("Processing...")
# Save the uploaded audio file
with open("temp_audio.mp3", "wb") as f:
f.write(audio_file.getbuffer())
# Convert audio to WAV format if needed
wav_path = convert_to_wav("temp_audio.mp3")
# Convert audio to text
text = audio_to_text(wav_path)
st.write("Voice command:", text)
# Find relevant advice
query_embedding = embed_text([text], model, tokenizer)
D, I = index.search(query_embedding, k=1) # Search for the most similar advice
closest_text = texts[I[0][0]]
st.write("Advice:", closest_text)
# Convert advice to speech
output_file = "advice.mp3"
output_path = text_to_speech(closest_text, output_file)
st.audio(output_path)
|