|
import streamlit as st |
|
import os |
|
import speech_recognition as sr |
|
import fitz |
|
from transformers import AutoTokenizer, AutoModel |
|
import torch |
|
import faiss |
|
import numpy as np |
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
|
|
|
|
def audio_to_text(audio_file): |
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(audio_file) as source: |
|
audio = recognizer.record(source) |
|
try: |
|
text = recognizer.recognize_google(audio) |
|
return text |
|
except sr.UnknownValueError: |
|
return "Sorry, I did not understand the audio" |
|
except sr.RequestError: |
|
return "Sorry, there was a problem with the request" |
|
|
|
|
|
def convert_to_wav(audio_file_path): |
|
audio = AudioSegment.from_file(audio_file_path) |
|
wav_path = "temp_audio.wav" |
|
audio.export(wav_path, format="wav") |
|
return wav_path |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
text = "" |
|
pdf_document = fitz.open(pdf_file) |
|
for page_num in range(len(pdf_document)): |
|
page = pdf_document.load_page(page_num) |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
def embed_text(texts, model, tokenizer): |
|
inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True) |
|
with torch.no_grad(): |
|
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy() |
|
return embeddings |
|
|
|
|
|
def text_to_speech(text, output_file): |
|
tts = gTTS(text=text, lang='en') |
|
tts.save(output_file) |
|
return output_file |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
|
model = AutoModel.from_pretrained("bert-base-uncased") |
|
|
|
|
|
dimension = 768 |
|
index = faiss.IndexFlatL2(dimension) |
|
|
|
|
|
pdf_folder_path = "pdfsforRAG" |
|
|
|
|
|
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')] |
|
|
|
texts = [] |
|
for path in pdf_paths: |
|
pdf_text = extract_text_from_pdf(path) |
|
texts.append(pdf_text) |
|
|
|
|
|
embeddings = embed_text(texts, model, tokenizer) |
|
index.add(embeddings) |
|
|
|
|
|
st.title("Parenting Guide App") |
|
|
|
|
|
audio_file = st.file_uploader("Record and upload your audio file (WAV/MP3)", type=["wav", "mp3"]) |
|
|
|
if audio_file: |
|
st.write("Processing...") |
|
|
|
|
|
with open("temp_audio.mp3", "wb") as f: |
|
f.write(audio_file.getbuffer()) |
|
|
|
|
|
wav_path = convert_to_wav("temp_audio.mp3") |
|
|
|
|
|
text = audio_to_text(wav_path) |
|
st.write("Voice command:", text) |
|
|
|
|
|
query_embedding = embed_text([text], model, tokenizer) |
|
D, I = index.search(query_embedding, k=1) |
|
closest_text = texts[I[0][0]] |
|
|
|
st.write("Advice:", closest_text) |
|
|
|
|
|
output_file = "advice.mp3" |
|
output_path = text_to_speech(closest_text, output_file) |
|
st.audio(output_path) |
|
|