File size: 1,423 Bytes
97579c6 5017c6b 97579c6 5017c6b 97579c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import json
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline
# Specify the model name
model_name = "Canstralian/RabbitRedux"
# Initialize the pipeline
nlp_pipeline = pipeline("text2text-generation", model=model_name)
# Example usage
input_text = "Provide an example of secure Python coding practices."
output = nlp_pipeline(input_text)
print(output)
def process_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def convert_to_json(text):
# Use the Hugging Face model to process the text
result = nlp_pipeline(text)
return result[0]['generated_text']
st.title("PDF to JSON Converter")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
st.write("Processing your file...")
# Extract text from the PDF
pdf_text = process_pdf(uploaded_file)
# Convert the extracted text to JSON using the Hugging Face model
json_output = convert_to_json(pdf_text)
# Display the JSON output
st.write("Converted JSON:")
st.json(json.loads(json_output))
# Provide a download link for the JSON file
json_filename = uploaded_file.name.replace(".pdf", ".json")
st.download_button(
label="Download JSON",
data=json_output,
file_name=json_filename,
mime="application/json"
)
|