Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,31 +25,157 @@ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
| 25 |
|
| 26 |
# Define available languages for TTS
|
| 27 |
AVAILABLE_LANGUAGES = [
|
| 28 |
-
|
| 29 |
-
("
|
| 30 |
-
|
| 31 |
-
("mr", "Marathi"),
|
| 32 |
-
("kn", "Kannada"),
|
| 33 |
-
("tl", "Filipino (Tagalog)"),
|
| 34 |
-
("fr", "French"),
|
| 35 |
-
("gu", "Gujarati"),
|
| 36 |
-
("hi", "Hindi"),
|
| 37 |
-
("ml", "Malayalam"),
|
| 38 |
-
("ta", "Tamil"),
|
| 39 |
-
("te", "Telugu"),
|
| 40 |
-
("ur", "Urdu"),
|
| 41 |
-
("si", "Sinhala")
|
| 42 |
]
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Get available languages for OCR
|
| 45 |
try:
|
| 46 |
langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
| 47 |
except:
|
| 48 |
langs = ['eng'] # Fallback to English if tesseract isn't properly configured
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def create_gradio_interface():
|
|
|
|
| 53 |
with gr.Blocks(title="Document Processing and TTS App") as demo:
|
| 54 |
gr.Markdown("# 📄 Document Processing, Text & Audio Generation App")
|
| 55 |
|
|
@@ -89,10 +215,9 @@ def create_gradio_interface():
|
|
| 89 |
label="Voice Speed"
|
| 90 |
)
|
| 91 |
language = gr.Dropdown(
|
| 92 |
-
choices=
|
| 93 |
label="Language for Audio and Script",
|
| 94 |
-
value="
|
| 95 |
-
type="value"
|
| 96 |
)
|
| 97 |
output_option = gr.Radio(
|
| 98 |
choices=["audio", "script_text", "both"],
|
|
@@ -122,9 +247,15 @@ def create_gradio_interface():
|
|
| 122 |
inputs=[answer_output],
|
| 123 |
outputs=[text_input]
|
| 124 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
generate_button.click(
|
| 127 |
-
fn=
|
| 128 |
inputs=[
|
| 129 |
api_key_input, text_input, model_dropdown, voice_type,
|
| 130 |
voice_speed, language, output_option
|
|
@@ -136,6 +267,4 @@ def create_gradio_interface():
|
|
| 136 |
|
| 137 |
if __name__ == "__main__":
|
| 138 |
demo = create_gradio_interface()
|
| 139 |
-
demo.launch()
|
| 140 |
-
else:
|
| 141 |
-
demo = create_gradio_interface()
|
|
|
|
| 25 |
|
| 26 |
# Define available languages for TTS
|
| 27 |
AVAILABLE_LANGUAGES = [
|
| 28 |
+
"English", "Arabic", "German", "Marathi", "Kannada",
|
| 29 |
+
"Filipino (Tagalog)", "French", "Gujarati", "Hindi",
|
| 30 |
+
"Malayalam", "Tamil", "Telugu", "Urdu", "Sinhala"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
]
|
| 32 |
|
| 33 |
+
LANGUAGE_CODES = {
|
| 34 |
+
"English": "en", "Arabic": "ar", "German": "de",
|
| 35 |
+
"Marathi": "mr", "Kannada": "kn", "Filipino (Tagalog)": "tl",
|
| 36 |
+
"French": "fr", "Gujarati": "gu", "Hindi": "hi",
|
| 37 |
+
"Malayalam": "ml", "Tamil": "ta", "Telugu": "te",
|
| 38 |
+
"Urdu": "ur", "Sinhala": "si"
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
# Get available languages for OCR
|
| 42 |
try:
|
| 43 |
langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
| 44 |
except:
|
| 45 |
langs = ['eng'] # Fallback to English if tesseract isn't properly configured
|
| 46 |
|
| 47 |
+
def create_temp_dir():
|
| 48 |
+
"""Create temporary directory if it doesn't exist"""
|
| 49 |
+
temp_dir = os.path.join(os.getcwd(), 'temp')
|
| 50 |
+
if not os.path.exists(temp_dir):
|
| 51 |
+
os.makedirs(temp_dir)
|
| 52 |
+
return temp_dir
|
| 53 |
+
|
| 54 |
+
def preprocess_image(image_path):
|
| 55 |
+
"""Preprocess the image for better OCR results"""
|
| 56 |
+
img = cv2.imread(image_path)
|
| 57 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 58 |
+
gray = cv2.equalizeHist(gray)
|
| 59 |
+
gray = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 60 |
+
processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 61 |
+
cv2.THRESH_BINARY, 11, 2)
|
| 62 |
+
temp_dir = create_temp_dir()
|
| 63 |
+
temp_filename = os.path.join(temp_dir, "processed_image.png")
|
| 64 |
+
cv2.imwrite(temp_filename, processed_image)
|
| 65 |
+
return temp_filename
|
| 66 |
+
|
| 67 |
+
def extract_text_from_image(image_path, lang='eng'):
|
| 68 |
+
"""Extract text from image using OCR"""
|
| 69 |
+
processed_image_path = preprocess_image(image_path)
|
| 70 |
+
text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
|
| 71 |
+
try:
|
| 72 |
+
os.remove(processed_image_path)
|
| 73 |
+
except:
|
| 74 |
+
pass
|
| 75 |
+
return text
|
| 76 |
+
|
| 77 |
+
def extract_text_from_pdf(pdf_path, lang='eng'):
|
| 78 |
+
"""Extract text from PDF file"""
|
| 79 |
+
text = ""
|
| 80 |
+
temp_dir = create_temp_dir()
|
| 81 |
+
try:
|
| 82 |
+
with open(pdf_path, 'rb') as file:
|
| 83 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 84 |
+
for page_num in range(len(pdf_reader.pages)):
|
| 85 |
+
page = pdf_reader.pages[page_num]
|
| 86 |
+
page_text = page.extract_text()
|
| 87 |
+
if page_text.strip():
|
| 88 |
+
text += page_text
|
| 89 |
+
else:
|
| 90 |
+
images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
|
| 91 |
+
for image in images:
|
| 92 |
+
temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
|
| 93 |
+
image.save(temp_image_path, 'PNG')
|
| 94 |
+
text += extract_text_from_image(temp_image_path, lang=lang)
|
| 95 |
+
text += f"\n[OCR applied on page {page_num + 1}]\n"
|
| 96 |
+
try:
|
| 97 |
+
os.remove(temp_image_path)
|
| 98 |
+
except:
|
| 99 |
+
pass
|
| 100 |
+
except Exception as e:
|
| 101 |
+
return f"Error processing PDF: {str(e)}"
|
| 102 |
+
return text
|
| 103 |
+
|
| 104 |
+
def extract_text(file_path, lang='eng'):
|
| 105 |
+
"""Extract text from uploaded file"""
|
| 106 |
+
file_ext = file_path.lower().split('.')[-1]
|
| 107 |
+
if file_ext in ['pdf']:
|
| 108 |
+
return extract_text_from_pdf(file_path, lang)
|
| 109 |
+
elif file_ext in ['png', 'jpg', 'jpeg']:
|
| 110 |
+
return extract_text_from_image(file_path, lang)
|
| 111 |
+
else:
|
| 112 |
+
return f"Unsupported file type: {file_ext}"
|
| 113 |
+
|
| 114 |
+
def process_upload(api_key, files, lang):
|
| 115 |
+
"""Process uploaded files and create vector index"""
|
| 116 |
+
global vector_index
|
| 117 |
+
|
| 118 |
+
if not api_key:
|
| 119 |
+
return "Please provide a valid OpenAI API Key."
|
| 120 |
+
|
| 121 |
+
if not files:
|
| 122 |
+
return "No files uploaded."
|
| 123 |
+
|
| 124 |
+
documents = []
|
| 125 |
+
error_messages = []
|
| 126 |
+
image_heavy_docs = []
|
| 127 |
+
|
| 128 |
+
for file_path in files:
|
| 129 |
+
try:
|
| 130 |
+
text = extract_text(file_path, lang)
|
| 131 |
+
if text.strip(): # Only add non-empty documents
|
| 132 |
+
documents.append(Document(text=text))
|
| 133 |
+
else:
|
| 134 |
+
error_messages.append(f"No text extracted from {os.path.basename(file_path)}")
|
| 135 |
+
except Exception as e:
|
| 136 |
+
error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
|
| 137 |
+
logging.error(error_message)
|
| 138 |
+
error_messages.append(error_message)
|
| 139 |
+
|
| 140 |
+
if documents:
|
| 141 |
+
try:
|
| 142 |
+
embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=api_key)
|
| 143 |
+
vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
|
| 144 |
+
|
| 145 |
+
success_message = f"Successfully indexed {len(documents)} files."
|
| 146 |
+
if error_messages:
|
| 147 |
+
success_message += f"\nErrors: {'; '.join(error_messages)}"
|
| 148 |
+
|
| 149 |
+
return success_message
|
| 150 |
+
except Exception as e:
|
| 151 |
+
return f"Error creating index: {str(e)}"
|
| 152 |
+
else:
|
| 153 |
+
return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
|
| 154 |
+
|
| 155 |
+
def query_app(query, model_name, use_similarity_check, api_key):
|
| 156 |
+
"""Process query and return response"""
|
| 157 |
+
global vector_index, query_log
|
| 158 |
+
|
| 159 |
+
if vector_index is None:
|
| 160 |
+
return "No documents indexed yet. Please upload documents first."
|
| 161 |
+
|
| 162 |
+
if not api_key:
|
| 163 |
+
return "Please provide a valid OpenAI API Key."
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
llm = OpenAI(model=model_name, api_key=api_key)
|
| 167 |
+
response_synthesizer = get_response_synthesizer(llm=llm)
|
| 168 |
+
query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
|
| 169 |
+
response = query_engine.query(query)
|
| 170 |
+
|
| 171 |
+
return response.response
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
logging.error(f"Error during query processing: {e}")
|
| 175 |
+
return f"Error during query processing: {str(e)}"
|
| 176 |
|
| 177 |
def create_gradio_interface():
|
| 178 |
+
"""Create and configure the Gradio interface"""
|
| 179 |
with gr.Blocks(title="Document Processing and TTS App") as demo:
|
| 180 |
gr.Markdown("# 📄 Document Processing, Text & Audio Generation App")
|
| 181 |
|
|
|
|
| 215 |
label="Voice Speed"
|
| 216 |
)
|
| 217 |
language = gr.Dropdown(
|
| 218 |
+
choices=AVAILABLE_LANGUAGES,
|
| 219 |
label="Language for Audio and Script",
|
| 220 |
+
value="English"
|
|
|
|
| 221 |
)
|
| 222 |
output_option = gr.Radio(
|
| 223 |
choices=["audio", "script_text", "both"],
|
|
|
|
| 247 |
inputs=[answer_output],
|
| 248 |
outputs=[text_input]
|
| 249 |
)
|
| 250 |
+
|
| 251 |
+
def process_generation(*args):
|
| 252 |
+
args = list(args)
|
| 253 |
+
# Convert language name to code
|
| 254 |
+
args[5] = LANGUAGE_CODES[args[5]]
|
| 255 |
+
return generate_audio_and_text(*args)
|
| 256 |
|
| 257 |
generate_button.click(
|
| 258 |
+
fn=process_generation,
|
| 259 |
inputs=[
|
| 260 |
api_key_input, text_input, model_dropdown, voice_type,
|
| 261 |
voice_speed, language, output_option
|
|
|
|
| 267 |
|
| 268 |
if __name__ == "__main__":
|
| 269 |
demo = create_gradio_interface()
|
| 270 |
+
demo.launch()
|
|
|
|
|
|