Spaces:
Sleeping
Sleeping
import gradio as gr | |
from utils import update_db_hub | |
from preprocessing import read_file | |
import datetime | |
def process_file(files, topic): | |
""" | |
Processes an uploaded file, extracts its text content, and saves it to the database. | |
This function performs the following steps: | |
1. Reads the content of the uploaded file using the `read_file` function. | |
- Supports `.docx`, `.txt`, and `.pdf` file formats. | |
2. Splits the extracted text into chunks (if applicable). | |
3. Saves the processed text and associated topics to the database using the `save_to_db` function. | |
4. Returns a success message if the file is processed and saved successfully. | |
If any error occurs during processing, the function catches the exception and returns an error message. | |
Parameters: | |
---------- | |
file : object | |
The uploaded file object. The file's name (`file.name`) is used to determine the file path. | |
topic : list or str | |
A list of topics or a single topic string associated with the file. These are saved to the database along with the file content. | |
Returns: | |
------- | |
str | |
- A success message indicating that the file was processed and saved successfully. | |
- An error message if an exception occurs during processing. | |
Example: | |
-------- | |
>>> process_file(uploaded_file, ["Persian Literature", "History"]) | |
'File processed successfully! File saved to the database.' | |
>>> process_file(unsupported_file, ["Science"]) | |
'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.' | |
""" | |
# progress = gr.Progress() | |
texts = [] | |
topics = [] | |
dates = [] | |
log_history = [] # To store logs for each file | |
for i, file in enumerate(files): | |
# progress(i / len(files), desc=f"Processing file {i + 1}/{len(files)}: {file_path}") | |
try: | |
# Read the file content | |
file_path = file.name | |
text = read_file(file_path) | |
print(f"for file {file_path}", text[:1000]) | |
texts.append(text) | |
topics.append(topic) | |
dates.append(datetime.datetime.now().isoformat()) | |
# Spl | |
# Save chunks to database | |
log_history.append( f"File {file_path} processed successfully! file saved to the database.") | |
except Exception as e: | |
log_history.append( f"Error processing for file {file_path}: {str(e)}") | |
print("save in db") | |
update_db_hub(texts, topics, dates) | |
print('saved') | |
# progress(1.0, desc="Processing complete!") | |
return "\n".join(log_history) | |
# Define Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Dataset Upload Interface") | |
with gr.Row(): | |
file_input = gr.File(label="Upload File (.docx or .txt or .pdf)", file_count="multiple") | |
topic_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin") | |
submit_button = gr.Button("Upload and Process") | |
output_text = gr.Textbox(label="Status") | |
submit_button.click(process_file, inputs=[file_input, topic_input], outputs=output_text) | |
# Launch the app | |
demo.launch() |